In [1]:
import pandas as pan
import numpy as np
import plotly.express as px
pays = "en Suisse romande"

Suisse romande

mots seuls

In [2]:
media1 = pan.read_csv("suisse-motsSeuls-media-nettoye.csv", low_memory=False, names=["mot","interactions"])
nonmedia1 = pan.read_csv("suisse-motsSeuls-nonmedia-nettoye.csv", low_memory=False, names=["mot","interactions"])
In [3]:
media1
Out[3]:
mot interactions
0 donner 49596
1 enfant 49596
2 envie 49596
3 intéresser 49596
4 biodiversité 49596
... ... ...
4393344 formulaire 0
4393345 ici 0
4393346 jardinière 0
4393347 recruter 0
4393348 rtn 0

4393349 rows × 2 columns

In [4]:
nonmedia1
Out[4]:
mot interactions
0 falloir 107933
1 il 107933
2 faire 107933
3 petit 107933
4 prince 107933
... ... ...
8890675 dal 0
8890676 tuo 0
8890677 background 0
8890678 maggiori 0
8890679 informazioni 0

8890680 rows × 2 columns

In [5]:
media1_table = pan.pivot_table(media1,index=["mot"],values=["mot","interactions"],aggfunc=[len,np.sum])
nonmedia1_table = pan.pivot_table(nonmedia1,index=["mot"],values=["mot","interactions"],aggfunc=[len,np.sum])
In [6]:
media1_table
Out[6]:
len sum
interactions interactions
mot
#12h45rts 2 200
#19h30rt 2 15
#coronaviru 1 325
#couleurslocales 2 111
#covid19 13 2360
... ... ...
수원 1 8
시즌 1 8
이동국 1 8
이동국의 1 8
전북 1 8

95205 rows × 2 columns

In [7]:
nonmedia1_table
Out[7]:
len sum
interactions interactions
mot
#111 1 5
#6 1 1
#7 1 36
#allezlausanne 1 105
#allezxamax 4 257
... ... ...
samedi 1 43
sea 3 30
sen 1 0
sonore 1 0
spontanes 2 0

217662 rows × 2 columns

In [8]:
media1_table["type"] = "media"
nonmedia1_table["type"] = "non-media"
In [9]:
media1_table
Out[9]:
len sum type
interactions interactions
mot
#12h45rts 2 200 media
#19h30rt 2 15 media
#coronaviru 1 325 media
#couleurslocales 2 111 media
#covid19 13 2360 media
... ... ... ...
수원 1 8 media
시즌 1 8 media
이동국 1 8 media
이동국의 1 8 media
전북 1 8 media

95205 rows × 3 columns

In [10]:
nonmedia1_table
Out[10]:
len sum type
interactions interactions
mot
#111 1 5 non-media
#6 1 1 non-media
#7 1 36 non-media
#allezlausanne 1 105 non-media
#allezxamax 4 257 non-media
... ... ... ...
samedi 1 43 non-media
sea 3 30 non-media
sen 1 0 non-media
sonore 1 0 non-media
spontanes 2 0 non-media

217662 rows × 3 columns

In [11]:
tableau1 = pan.concat([media1_table,nonmedia1_table], names = ["mot","nb","interactions","type"], levels=0)
In [12]:
tableau1
Out[12]:
len sum type
interactions interactions
mot
#12h45rts 2 200 media
#19h30rt 2 15 media
#coronaviru 1 325 media
#couleurslocales 2 111 media
#covid19 13 2360 media
... ... ... ...
samedi 1 43 non-media
sea 3 30 non-media
sen 1 0 non-media
sonore 1 0 non-media
spontanes 2 0 non-media

312867 rows × 3 columns

In [13]:
tableau1["mot"] = tableau1.index
tableau1.columns = ["nb","interactions","media","mot"]
tableau1.reset_index(drop=True, inplace=True)
tableau1
Out[13]:
nb interactions media mot
0 2 200 media #12h45rts
1 2 15 media #19h30rt
2 1 325 media #coronaviru
3 2 111 media #couleurslocales
4 13 2360 media #covid19
... ... ... ... ...
312862 1 43 non-media samedi
312863 3 30 non-media sea
312864 1 0 non-media sen
312865 1 0 non-media sonore
312866 2 0 non-media spontanes

312867 rows × 4 columns

In [14]:
khi2_1 = pan.pivot_table(tableau1,index=["mot"],columns=["media"],values=["mot","interactions"],aggfunc=[len,np.sum],fill_value=0)
In [15]:
khi2_1
Out[15]:
len sum
interactions nb interactions nb
media media non-media media non-media media non-media media non-media
mot
#111 0 1 0 1 0 5 0 1
#12h45rts 1 0 1 0 200 0 2 0
#19h30rt 1 0 1 0 15 0 2 0
#6 0 1 0 1 0 1 0 1
#7 0 1 0 1 0 36 0 1
... ... ... ... ... ... ... ... ...
samedi 0 1 0 1 0 43 0 1
sea 0 1 0 1 0 30 0 3
sen 0 1 0 1 0 0 0 1
sonore 0 1 0 1 0 0 0 1
spontanes 0 1 0 1 0 0 0 2

251466 rows × 8 columns

In [16]:
khi2_1["mot"] = khi2_1.index
khi2_1.columns = khi2_1.columns.get_level_values(0)
khi2_1.reset_index(drop=True, inplace=True)
khi2_1.columns = ["a","b","c","d","interactions_media","interactions_nonmedia","obs_media","obs_nonmedia","mot"]
khi2_1 = khi2_1.drop(columns=["a","b","c","d"])
khi2_1
Out[16]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot
0 0 5 0 1 #111
1 200 0 2 0 #12h45rts
2 15 0 2 0 #19h30rt
3 0 1 0 1 #6
4 0 36 0 1 #7
... ... ... ... ... ...
251461 0 43 0 1 samedi
251462 0 30 0 3 sea
251463 0 0 0 1 sen
251464 0 0 0 1 sonore
251465 0 0 0 2 spontanes

251466 rows × 5 columns

In [17]:
khi2_1.obs_media.sum(), khi2_1.obs_nonmedia.sum()
Out[17]:
(4393348, 8890588)
In [18]:
print("Nb de lignes fichier media1 = ", media1.shape[0])
print("Somme observée média khi2_1 = ", khi2_1.obs_media.sum())

print("Nb de lignes fichier nonmedia1 = ", nonmedia1.shape[0])
print("Somme observée nonmédia khi2_1 = ", khi2_1.obs_nonmedia.sum())
Nb de lignes fichier media1 =  4393349
Somme observée média khi2_1 =  4393348
Nb de lignes fichier nonmedia1 =  8890680
Somme observée nonmédia khi2_1 =  8890588
In [19]:
khi2_1["exp_media"] = ((khi2_1.obs_media + khi2_1.obs_nonmedia) * khi2_1.obs_media.sum()) / (khi2_1.obs_media.sum() + khi2_1.obs_nonmedia.sum())
khi2_1["exp_nonmedia"] = ((khi2_1.obs_media + khi2_1.obs_nonmedia) * khi2_1.obs_nonmedia.sum()) / (khi2_1.obs_media.sum() + khi2_1.obs_nonmedia.sum())
khi2_1
Out[19]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot exp_media exp_nonmedia
0 0 5 0 1 #111 0.330726 0.669274
1 200 0 2 0 #12h45rts 0.661453 1.338547
2 15 0 2 0 #19h30rt 0.661453 1.338547
3 0 1 0 1 #6 0.330726 0.669274
4 0 36 0 1 #7 0.330726 0.669274
... ... ... ... ... ... ... ...
251461 0 43 0 1 samedi 0.330726 0.669274
251462 0 30 0 3 sea 0.992179 2.007821
251463 0 0 0 1 sen 0.330726 0.669274
251464 0 0 0 1 sonore 0.330726 0.669274
251465 0 0 0 2 spontanes 0.661453 1.338547

251466 rows × 7 columns

In [20]:
khi2_1["res_media"] = (khi2_1.obs_media - khi2_1.exp_media) / np.sqrt(khi2_1.exp_media)
khi2_1["res_nonmedia"] = (khi2_1.obs_nonmedia - khi2_1.exp_nonmedia) / np.sqrt(khi2_1.exp_nonmedia)
khi2_1
Out[20]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot exp_media exp_nonmedia res_media res_nonmedia
0 0 5 0 1 #111 0.330726 0.669274 -0.575088 0.404266
1 200 0 2 0 #12h45rts 0.661453 1.338547 1.645827 -1.156956
2 15 0 2 0 #19h30rt 0.661453 1.338547 1.645827 -1.156956
3 0 1 0 1 #6 0.330726 0.669274 -0.575088 0.404266
4 0 36 0 1 #7 0.330726 0.669274 -0.575088 0.404266
... ... ... ... ... ... ... ... ... ...
251461 0 43 0 1 samedi 0.330726 0.669274 -0.575088 0.404266
251462 0 30 0 3 sea 0.992179 2.007821 -0.996082 0.700209
251463 0 0 0 1 sen 0.330726 0.669274 -0.575088 0.404266
251464 0 0 0 1 sonore 0.330726 0.669274 -0.575088 0.404266
251465 0 0 0 2 spontanes 0.661453 1.338547 -0.813297 0.571718

251466 rows × 9 columns

In [21]:
graph_media1 = khi2_1.sort_values(by=["res_media"],ascending=False).head(50)
graph_media1
Out[21]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot exp_media exp_nonmedia res_media res_nonmedia
63253 4149267 2064994 31611 23430 coronavirus 18203.510410 36837.489590 99.373342 -69.855798
214837 4796589 4058911 42891 37794 suisse 26684.657573 54000.342427 99.209713 -69.740773
49667 1406213 769170 12838 7239 canton 6639.993432 13437.006568 76.062082 -53.468841
97318 122000 97710 5358 1188 football 2164.934851 4381.065149 68.625475 -48.241180
100618 1393898 611575 10073 7190 fédéral 5709.329413 11553.670587 57.750987 -40.596815
231498 437696 350223 4106 1105 valaisan 1723.415141 3487.584859 57.392273 -40.344653
135419 94100 115024 4472 1425 league 1950.293434 3946.706566 57.101113 -40.139978
170201 976427 639521 8918 5954 pandémie 4918.562650 9953.437350 57.026904 -40.087812
98401 448226 184717 5528 2578 franc 2680.867996 5425.132004 54.988249 -38.654713
232384 424106 292245 5545 2759 vaudois 2746.351819 5557.648181 53.403561 -37.540736
64476 2208219 1380470 14422 13895 covid-19 9365.178763 18951.821237 52.254003 -36.732639
113513 133183 157688 4124 1686 hockey 1921.520239 3888.479761 50.244606 -35.320107
141459 844279 403020 7886 5850 lundi 4542.857488 9193.142512 49.600989 -34.867667
50752 1560407 1034103 8529 6639 cas 5016.457657 10151.542343 49.593298 -34.862261
248746 308359 166994 3828 1499 évolution 1761.779400 3565.220600 49.226723 -34.604572
40536 91758 46109 2713 643 bienne 1109.917715 2246.082285 48.118316 -33.825403
126744 169070 70334 3063 968 jurassien 1333.158018 2697.841982 47.376789 -33.304137
160297 616425 592349 8933 7563 neuchâtel 5455.662283 11040.337717 47.078556 -33.094490
192518 244069 88470 2058 242 restezchezvous 760.670663 1539.329337 47.038331 -33.066214
160311 278821 197860 4389 2255 neuchâtelois 2197.346036 4446.653964 46.754429 -32.866641
23798 44137 33569 2411 503 alpha 963.736657 1950.263343 46.619582 -32.771848
126703 243753 168092 3621 1530 jura 1703.571558 3447.428442 46.455696 -32.656643
39244 124746 62927 2385 505 bernois 955.799224 1934.200776 46.228511 -32.496940
138268 52101 59326 2857 926 ligue 1251.137877 2531.862123 45.399981 -31.914514
145302 647636 300044 6717 5172 mardi 3932.005873 7956.994127 44.413726 -31.221212
92024 708603 954021 8726 7781 face 5459.300273 11047.699727 44.212057 -31.079447
149179 694881 361135 7159 5780 mercredi 4279.268567 8659.731433 44.021741 -30.945662
39207 287615 290843 3313 1455 berne 1576.903356 3191.096644 43.719115 -30.732927
78918 530653 143424 2836 1057 décès 1287.517778 2605.482222 43.154849 -30.336269
154392 119643 19100 2009 399 morges 796.389111 1611.610889 42.969352 -30.205871
103351 1614616 1121151 13056 14228 genève 9023.538418 18260.461582 42.450402 -29.841068
59637 244884 172102 4686 3016 commune 2547.254541 5154.745459 42.376290 -29.788970
93595 149483 397870 5219 3633 fc 2927.589872 5924.410128 42.349444 -29.770098
73441 719051 772325 7999 7157 dimanche 5012.488941 10143.511059 42.182995 -29.653091
103175 561834 184280 5068 3488 genevois 2829.694865 5726.305135 42.077457 -29.578902
183502 564133 369689 6482 5235 président 3875.120937 7841.879063 41.877260 -29.438170
226850 314163 218315 3423 1740 trump 1707.540275 3455.459725 41.514045 -29.182844
125789 110511 287173 3729 2062 joueur 1915.236438 3875.763562 41.444745 -29.134129
151035 490438 797249 6534 5369 million 3936.636043 7966.363957 41.397125 -29.100653
27861 177332 33345 2121 590 app 896.599203 1814.400797 40.890689 -28.744648
89018 207327 32124 2525 967 etats-unis 1154.896502 2337.103498 40.316409 -28.340950
53102 58556 105197 2880 1305 championnat 1384.089880 2800.910120 40.209008 -28.265451
185693 425287 141646 3005 1450 quarantaine 1473.386001 2981.613999 39.901669 -28.049403
195021 473294 243486 4932 3638 romand 2834.325034 5735.674966 39.401556 -27.697842
61339 1434466 925400 13613 15817 conseil 9733.277218 19696.722782 39.325219 -27.644180
111511 106582 131096 2933 1423 hc 1440.644090 2915.355910 39.318239 -27.639273
220495 54529 26329 1650 330 tennis 654.838223 1325.161777 38.888991 -27.337528
26233 879275 614067 7538 7094 annoncer 4839.188320 9792.811680 38.795944 -27.272119
54033 209656 135217 3543 2131 chaux-de-fonds 1876.541452 3797.458548 38.469402 -27.042572
232888 743931 464749 7777 7502 vendredi 5053.168285 10225.831715 38.317608 -26.935867
In [22]:
fig = px.bar(graph_media1, x="res_media", y="mot", labels={"res_media":"Résiduel de Pearson","mot":"Lemme (dans les posts Facebook <b>médias</b>)"}, orientation="h", title="Lemmes les plus caractéristiques des posts Facebook <b>médias</b> {} en 2020".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="red", opacity=0.75, textfont_size=12)
fig
In [23]:
graph_nonmedia1 = khi2_1.sort_values(by=["res_nonmedia"],ascending=False).head(50)
graph_nonmedia1
Out[23]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot exp_media exp_nonmedia res_media res_nonmedia
174900 299280 7716498 3976 55597 photo 19702.362342 39870.637658 -112.038975 78.759271
78831 409877 2873301 5611 46373 découvrir 17192.479882 34791.520118 -88.327267 62.090814
114262 1757 30933 339 24059 horoscope 8069.062099 16328.937901 -86.054078 60.492846
222630 15728 4913467 428 20718 timeline 6993.539927 14152.460073 -78.509447 55.189248
206547 59933 221399 1264 22084 signe 7721.799405 15626.200595 -73.489500 51.660411
7200 465833 1856324 9397 43319 2020 17434.571588 35281.428412 -60.872230 42.790935
120529 57503 291927 708 13905 instagram 4832.904519 9780.095481 -59.334842 41.710208
24895 51410 1508070 708 13768 amour 4787.595006 9688.404994 -58.960119 41.446791
134072 31334 16163 37 8855 lanta 2940.818927 5951.181073 -53.547030 37.641589
130232 31562 16208 39 8795 koh 2921.636797 5912.363203 -53.330642 37.489476
92731 31997 192328 797 11779 fan 4159.214893 8416.785107 -52.133849 36.648175
64050 131934 315661 943 12228 couple 4355.997086 8815.002914 -51.712094 36.351697
99564 70893 2086825 1191 12900 from 4660.265351 9430.734649 -50.819712 35.724385
189082 47510 288848 1139 12649 recette 4560.055260 9227.944740 -50.661149 35.612921
221318 90292 714511 1196 12449 the 4512.761388 9132.238612 -49.373401 34.707682
201406 266828 2108996 3333 19568 savoir 7573.964716 15327.035284 -48.730742 34.255916
179062 195944 2173828 1893 14551 post 5438.464512 11005.535488 -48.076738 33.796175
94082 652920 1531120 4957 23640 femme 9457.782148 19139.217852 -46.279999 32.533134
248831 292681 2070334 3042 17399 être 6760.377833 13680.622167 -45.223902 31.790737
125994 168005 1082434 2821 16467 journée 6379.050322 12908.949678 -44.548601 31.316025
95268 292091 708656 997 10108 fille 3672.716395 7432.283605 -44.151594 31.036943
53810 151589 665568 522 8104 chat 2852.845711 5773.154289 -43.638959 30.676580
92355 1115645 4240036 11721 40260 faire 17191.487703 34789.512297 -41.722403 29.329312
54778 202932 760769 1037 9342 chien 3432.609047 6946.390953 -40.888744 28.743281
54634 13262 277547 653 7886 chf 2824.072517 5714.927483 -40.854171 28.718977
237433 193748 1629315 2696 14725 vous 5761.584180 11659.415820 -40.387065 28.390618
110243 5544 704174 37 4854 hallyday 1617.582700 3273.417300 -39.299224 27.625907
32433 0 7505 0 4457 auproux 1474.047454 2982.952546 -38.393326 26.989093
38238 148809 1942899 1969 11751 bel 4537.565866 9182.434134 -38.131091 26.804752
20882 7473 8241 37 4522 agathe 1507.781544 3051.218456 -37.877295 26.626343
24911 36456 468685 401 6126 amoureux 2158.651050 4368.348950 -37.830444 26.593408
11018 1469 768904 77 4473 3 1504.805007 3045.194993 -36.806861 25.873867
31057 4 11896 2 4067 astrologique 1345.725620 2723.274380 -36.629614 25.749269
92112 21440 270797 509 6213 facile 2223.142693 4498.857307 -36.354929 25.556176
25381 13880 274397 287 5114 and 1786.253152 3614.746848 -35.473453 24.936530
186609 23196 100911 133 4417 race 1504.805007 3045.194993 -35.363258 24.859067
231222 195676 491076 1886 10685 vacance 4157.561261 8413.438739 -35.229390 24.764963
50756 15381 17863 41 3874 casa 1294.793758 2620.206242 -34.843823 24.493924
170452 2513 14743 28 3795 papel 1264.366932 2558.633068 -34.770490 24.442374
107089 83816 317902 999 7580 gratuit 2837.301572 5741.698428 -34.511514 24.260322
245439 137 21480 12 3651 zodiaque 1211.450712 2451.549288 -34.461131 24.224906
165418 153631 511024 1103 7919 of 2983.813356 6038.186644 -34.431812 24.204295
183342 14226 275465 173 4394 prénom 1510.427355 3056.572645 -34.412821 24.190945
223150 48583 274699 248 4331 to 1514.396071 3064.603929 -32.542418 22.876121
197947 67202 159920 657 5902 réserve 2169.234294 4389.765706 -32.468771 22.824350
148453 163390 1051836 2541 11814 meilleur 4747.577114 9607.422886 -32.024554 22.512082
171247 98476 904647 1388 8253 partager 3188.532982 6452.467018 -31.886410 22.414971
177642 25458 91485 409 4843 poids 1736.974922 3515.025078 -31.863472 22.398847
234818 797121 3693372 5483 19793 vie 8359.439856 16916.560144 -31.460579 22.115628
2380 319608 1135228 3082 13221 10 5391.832093 10911.167907 -31.456633 22.112854
In [24]:
fig = px.bar(graph_nonmedia1, x="res_nonmedia", y="mot", labels={"res_nonmedia":"Résiduel de Pearson","mot":"Lemme (dans les posts Facebook <b>non-médias</b>)"}, orientation="h", title="Lemmes les plus caractéristiques des posts Facebook <b>non-médias</b> {} en 2020".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="darkorange", opacity=0.75, textfont_size=12)
fig
In [25]:
khi2_1["exp_inter_media"] = ((khi2_1.interactions_media + khi2_1.interactions_nonmedia) * khi2_1.interactions_media.sum()) / (khi2_1.interactions_media.sum() + khi2_1.interactions_nonmedia.sum())
khi2_1["exp_inter_nonmedia"] = ((khi2_1.interactions_media + khi2_1.interactions_nonmedia) * khi2_1.interactions_nonmedia.sum()) / (khi2_1.interactions_media.sum() + khi2_1.interactions_nonmedia.sum())
khi2_1
Out[25]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia
0 0 5 0 1 #111 0.330726 0.669274 -0.575088 0.404266 1.334422 3.665578
1 200 0 2 0 #12h45rts 0.661453 1.338547 1.645827 -1.156956 53.376886 146.623114
2 15 0 2 0 #19h30rt 0.661453 1.338547 1.645827 -1.156956 4.003266 10.996734
3 0 1 0 1 #6 0.330726 0.669274 -0.575088 0.404266 0.266884 0.733116
4 0 36 0 1 #7 0.330726 0.669274 -0.575088 0.404266 9.607839 26.392161
... ... ... ... ... ... ... ... ... ... ... ...
251461 0 43 0 1 samedi 0.330726 0.669274 -0.575088 0.404266 11.476030 31.523970
251462 0 30 0 3 sea 0.992179 2.007821 -0.996082 0.700209 8.006533 21.993467
251463 0 0 0 1 sen 0.330726 0.669274 -0.575088 0.404266 0.000000 0.000000
251464 0 0 0 1 sonore 0.330726 0.669274 -0.575088 0.404266 0.000000 0.000000
251465 0 0 0 2 spontanes 0.661453 1.338547 -0.813297 0.571718 0.000000 0.000000

251466 rows × 11 columns

In [26]:
khi2_1["res_inter_media"] = (khi2_1.interactions_media - khi2_1.exp_inter_media) / np.sqrt(khi2_1.exp_inter_media)
khi2_1["res_inter_nonmedia"] = (khi2_1.interactions_nonmedia - khi2_1.exp_inter_nonmedia) / np.sqrt(khi2_1.exp_inter_nonmedia)
khi2_1
Out[26]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia res_inter_media res_inter_nonmedia
0 0 5 0 1 #111 0.330726 0.669274 -0.575088 0.404266 1.334422 3.665578 -1.155172 0.696983
1 200 0 2 0 #12h45rts 0.661453 1.338547 1.645827 -1.156956 53.376886 146.623114 20.069004 -12.108803
2 15 0 2 0 #19h30rt 0.661453 1.338547 1.645827 -1.156956 4.003266 10.996734 5.496123 -3.316132
3 0 1 0 1 #6 0.330726 0.669274 -0.575088 0.404266 0.266884 0.733116 -0.516609 0.311700
4 0 36 0 1 #7 0.330726 0.669274 -0.575088 0.404266 9.607839 26.392161 -3.099652 1.870201
... ... ... ... ... ... ... ... ... ... ... ... ... ...
251461 0 43 0 1 samedi 0.330726 0.669274 -0.575088 0.404266 11.476030 31.523970 -3.387629 2.043955
251462 0 30 0 3 sea 0.992179 2.007821 -0.996082 0.700209 8.006533 21.993467 -2.829582 1.707252
251463 0 0 0 1 sen 0.330726 0.669274 -0.575088 0.404266 0.000000 0.000000 NaN NaN
251464 0 0 0 1 sonore 0.330726 0.669274 -0.575088 0.404266 0.000000 0.000000 NaN NaN
251465 0 0 0 2 spontanes 0.661453 1.338547 -0.813297 0.571718 0.000000 0.000000 NaN NaN

251466 rows × 13 columns

In [27]:
graph_interactions_media1 = khi2_1.sort_values(by=["res_inter_media"],ascending=False).head(50)
graph_interactions_media1
Out[27]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia res_inter_media res_inter_nonmedia
63253 4149267 2064994 31611 23430 coronavirus 18203.510410 36837.489590 99.373342 -69.855798 1.658489e+06 4.555772e+06 1934.098426 -1166.954619
214837 4796589 4058911 42891 37794 suisse 26684.657573 54000.342427 99.209713 -69.740773 2.363395e+06 6.492105e+06 1582.736305 -954.957317
64476 2208219 1380470 14422 13895 covid-19 9365.178763 18951.821237 52.254003 -36.732639 9.577652e+05 2.630924e+06 1277.727129 -770.927454
100618 1393898 611575 10073 7190 fédéral 5709.329413 11553.670587 57.750987 -40.596815 5.352295e+05 1.470243e+06 1173.695651 -708.159183
49667 1406213 769170 12838 7239 canton 6639.993432 13437.006568 76.062082 -53.468841 5.805758e+05 1.594807e+06 1083.576913 -653.785281
50752 1560407 1034103 8529 6639 cas 5016.457657 10151.542343 49.593298 -34.862261 6.924343e+05 1.902076e+06 1043.077947 -629.349888
103351 1614616 1121151 13056 14228 genève 9023.538418 18260.461582 42.450402 -29.841068 7.301336e+05 2.005633e+06 1035.113607 -624.544537
61339 1434466 925400 13613 15817 conseil 9733.277218 19696.722782 39.325219 -27.644180 6.298115e+05 1.730055e+06 1013.921099 -611.757858
191715 572057 103322 2368 1080 reportage 1140.344541 2307.655459 36.354523 -25.555890 1.802481e+05 4.951309e+05 922.866454 -556.819269
141459 844279 403020 7886 5850 lundi 4542.857488 9193.142512 49.600989 -34.867667 3.328847e+05 9.144143e+05 886.357643 -534.791370
149526 1310710 1091592 8975 9332 mesure 6054.607748 12252.392252 37.531695 -26.383399 6.411370e+05 1.761165e+06 836.223783 -504.542682
170201 976427 639521 8918 5954 pandémie 4918.562650 9953.437350 57.026904 -40.087812 4.312714e+05 1.184677e+06 830.128265 -500.864900
116002 930148 582889 4595 3870 hôpital 2799.598765 5665.401235 33.932325 -23.853174 4.038060e+05 1.109231e+06 828.288480 -499.754850
78918 530653 143424 2836 1057 décès 1287.517778 2605.482222 43.154849 -30.336269 1.799007e+05 4.941763e+05 826.959443 -498.952964
146403 1015564 695958 4926 5810 masque 3550.678363 7185.321637 23.080680 -16.224868 4.567786e+05 1.254743e+06 826.784282 -498.847279
103175 561834 184280 5068 3488 genevois 2829.694865 5726.305135 42.077457 -29.578902 1.991262e+05 5.469878e+05 812.816801 -490.419882
231492 918817 612192 6099 5261 valais 3757.051621 7602.948379 38.207950 -26.858781 4.086025e+05 1.122407e+06 798.182752 -481.590304
145302 647636 300044 6717 5172 mardi 3932.005873 7956.994127 44.413726 -31.221212 2.529210e+05 6.947590e+05 784.858048 -473.550732
149179 694881 361135 7159 5780 mercredi 4279.268567 8659.731433 44.021741 -30.945662 2.818342e+05 7.741818e+05 778.040790 -469.437482
12040 306092 7468 85 61 369 48.286051 97.713949 5.283482 -3.714093 8.368428e+04 2.298757e+05 768.825730 -463.877497
26233 879275 614067 7538 7094 annoncer 4839.188320 9792.811680 38.795944 -27.272119 3.985497e+05 1.094792e+06 761.475083 -459.442422
163130 1877064 2214889 17062 27093 nouveau 14603.223091 29551.776909 20.346737 -14.303006 1.092079e+06 2.999874e+06 751.163845 -453.221050
232888 743931 464749 7777 7502 vendredi 5053.168285 10225.831715 38.317608 -26.935867 3.225779e+05 8.861021e+05 741.871926 -447.614692
185693 425287 141646 3005 1450 quarantaine 1473.386001 2981.613999 39.901669 -28.049403 1.513056e+05 4.156274e+05 704.358257 -424.980503
200079 810826 612714 8141 8666 samedi 5558.518186 11248.481814 34.638395 -24.349515 3.799207e+05 1.043619e+06 699.093752 -421.804119
196090 399170 129226 2453 2352 rts 1589.140232 3215.859768 21.670161 -15.233325 1.410207e+05 3.873753e+05 687.431822 -414.767796
98401 448226 184717 5528 2578 franc 2680.867996 5425.132004 54.988249 -38.654713 1.689226e+05 4.640204e+05 679.566960 -410.022464
124912 912074 790701 6807 11154 jeune 5940.176423 12020.823577 11.246854 -7.906124 4.544441e+05 1.248331e+06 678.850449 -409.590152
78923 359895 107364 2263 941 décéder 1059.647306 2144.352694 36.966835 -25.986323 1.247042e+05 3.425548e+05 666.008794 -401.842031
73775 590941 364820 5490 4893 direct 3433.931952 6949.068048 35.086629 -24.664607 2.550777e+05 7.006833e+05 665.007040 -401.237614
173822 1431598 1697788 9831 17126 personne 8915.390893 18041.609107 9.697056 -6.816673 8.351844e+05 2.294202e+06 652.613954 -393.760141
120928 380152 138639 858 725 intensif 523.539852 1059.460148 14.617379 -10.275479 1.384572e+05 3.803338e+05 649.545234 -391.908603
195021 473294 243486 4932 3638 romand 2834.325034 5735.674966 39.401556 -27.697842 1.912974e+05 5.254826e+05 644.746929 -389.013505
227224 210167 2483 274 36 ttc 102.525176 207.474824 16.934996 -11.904678 5.675297e+04 1.558970e+05 643.977593 -388.549319
94314 544054 330145 2799 2936 fermer 1896.715761 3838.284239 20.717734 -14.563803 2.333101e+05 6.408889e+05 643.333016 -388.160409
165668 323774 90897 1650 967 ofsp 865.510924 1751.489076 26.665555 -18.744902 1.106692e+05 3.040018e+05 640.589351 -386.504995
124880 603004 415228 6410 6085 jeudi 4132.426057 8362.573943 35.429901 -24.905915 2.717503e+05 7.464817e+05 635.441821 -383.399189
183502 564133 369689 6482 5235 président 3875.120937 7841.879063 41.877260 -29.438170 2.492226e+05 6.845994e+05 630.802495 -380.600013
164720 372285 151453 1565 2384 obligatoire 1306.038455 2642.961545 7.165678 -5.037208 1.397775e+05 3.839605e+05 621.896731 -375.226645
184074 871883 830637 7570 9067 public 5502.294702 11134.705298 27.875118 -19.595181 4.543761e+05 1.248144e+06 619.378251 -373.707098
33287 502293 313249 3246 2481 autorité 1894.069950 3832.930050 31.063917 -21.836789 2.176555e+05 5.978865e+05 610.108944 -368.114384
196104 188172 3914 313 15 rtsavecvous 108.478251 219.521749 19.636684 -13.803866 5.126476e+04 1.408212e+05 604.667927 -364.831500
89003 498105 316446 4935 4328 etat 3063.518412 6199.481588 33.812347 -23.768834 2.173910e+05 5.971600e+05 602.064953 -363.260974
25153 2087726 3109971 19230 31390 an 16741.369106 33878.630894 19.233796 -13.520650 1.387184e+06 3.810513e+06 594.794351 -358.874195
65290 712021 629921 9105 9634 crise 6197.481543 12541.518457 36.933023 -25.962554 3.581434e+05 9.837986e+05 591.322702 -356.779546
177866 751028 689665 4245 3516 police 2566.767397 5194.232603 33.125230 -23.285816 3.844985e+05 1.056194e+06 591.100817 -356.645670
6380 172467 1352 793 23 19h30rts 269.872722 546.127278 31.844019 -22.385172 4.638958e+04 1.274294e+05 585.365410 -353.185164
163242 183003 9753 391 45 nouvo 144.196700 291.803300 20.552909 -14.447938 5.144357e+04 1.413124e+05 580.037933 -349.970785
133797 495278 344297 5699 4732 lancer 3449.806818 6981.193182 38.293880 -26.919186 2.240695e+05 6.155055e+05 572.943687 -345.690412
208647 431379 264286 3224 4850 société 2670.284752 5403.715248 10.715383 -7.532519 1.856622e+05 5.100028e+05 570.260425 -344.071443
In [28]:
fig = px.bar(graph_interactions_media1, x="res_inter_media", y="mot", labels={"res_inter_media":"Résiduel de Pearson (<b>en fonction des interactions</b>)","mot":"Lemme (dans les posts Facebook <b>médias</b>)"}, orientation="h", title="Lemmes les plus caractéristiques des posts Facebook <b>médias</b> {} en 2020<br>(en fonction des interactions suscitées)".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="gold", opacity=0.75, textfont_size=12)
fig
In [29]:
graph_interactions_nonmedia1 = khi2_1.sort_values(by=["res_inter_nonmedia"],ascending=False).head(50)
graph_interactions_nonmedia1
Out[29]:
interactions_media interactions_nonmedia obs_media obs_nonmedia mot exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia res_inter_media res_inter_nonmedia
174900 299280 7716498 3976 55597 photo 19702.362342 39870.637658 -112.038975 78.759271 2.139286e+06 5.876492e+06 -1258.012219 759.032297
222630 15728 4913467 428 20718 timeline 6993.539927 14152.460073 -78.509447 55.189248 1.315525e+06 3.613670e+06 -1133.250825 683.756456
217787 170705 3092510 3153 12351 série 5127.581719 10376.418281 -27.575209 19.384356 8.709013e+05 2.392314e+06 -750.300665 452.700243
73075 14190 2196616 237 3878 dieu 1360.939033 2754.060967 -30.466560 21.416869 5.900297e+05 1.620776e+06 -749.660564 452.314032
65945 43053 2322400 1053 1708 crédit 913.135522 1847.864478 4.628495 -3.253662 6.313026e+05 1.734150e+06 -740.359820 446.702350
99564 70893 2086825 1191 12900 from 4660.265351 9430.734649 -50.819712 35.724385 5.758613e+05 1.581857e+06 -665.434305 401.495408
22946 0 1568544 0 2720 alffr 899.575740 1820.424260 -29.992928 21.083923 4.186200e+05 1.149924e+06 -647.008477 390.378029
210670 4497 1572818 19 2014 souviens 672.366721 1360.633279 -25.197294 17.712769 4.209608e+05 1.156354e+06 -641.883831 387.286031
227730 49609 1674101 714 3513 tv 1397.980387 2829.019613 -18.293344 12.859547 4.600314e+05 1.263679e+06 -605.114129 365.100720
117418 238388 2477715 2116 6755 image 2933.873673 5937.126327 -15.099603 10.614464 7.248856e+05 1.991217e+06 -571.407537 344.763563
24895 51410 1508070 708 13768 amour 4787.595006 9688.404994 -58.960119 41.446791 4.162009e+05 1.143279e+06 -565.447786 341.167697
179062 195944 2173828 1893 14551 post 5438.464512 11005.535488 -48.076738 33.796175 6.324552e+05 1.737317e+06 -548.883958 331.173772
38238 148809 1942899 1969 11751 bel 4537.565866 9182.434134 -38.131091 26.804752 5.582443e+05 1.533464e+06 -547.990628 330.634774
202576 4183 1065822 155 1469 schweiz 537.099633 1086.900367 -16.487287 11.589955 2.855677e+05 7.844373e+05 -526.557638 317.702998
71798 61140 1354898 869 3227 dessin 1354.655232 2741.344768 -13.195130 9.275690 3.779185e+05 1.038120e+06 -515.295816 310.908083
216926 15744 1072319 267 2735 switzerland 992.840578 2009.159422 -23.035704 16.193251 2.903871e+05 7.976759e+05 -509.659366 307.507283
190096 24114 1091458 372 3153 regarder 1165.810472 2359.189528 -23.248923 16.343137 2.977288e+05 8.178432e+05 -501.451758 302.555153
78831 409877 2873301 5611 46373 découvrir 17192.479882 34791.520118 -88.327267 62.090814 8.762291e+05 2.406949e+06 -498.201569 300.594124
109410 6991 924771 75 1194 générique 419.691770 849.308230 -16.825411 11.827644 2.486728e+05 6.830892e+05 -484.651740 292.418720
210644 48791 1161140 886 2912 souvenir 1256.098773 2541.901227 -10.442530 7.340714 3.229117e+05 8.870193e+05 -482.391872 291.055210
201406 266828 2108996 3333 19568 savoir 7573.964716 15327.035284 -48.730742 34.255916 6.340704e+05 1.741754e+06 -461.194483 278.265586
191137 6282 828420 93 1334 renaud 471.946537 955.053463 -17.443416 12.262079 2.227690e+05 6.119330e+05 -458.674303 276.745015
153697 190183 1748089 1568 2376 montagne 1304.384823 2639.615177 7.299070 -5.130977 5.172946e+05 1.420977e+06 -454.806840 274.411548
89100 2038 783724 6 762 eternel 253.997856 514.002144 -15.560835 10.938694 2.097076e+05 5.760544e+05 -453.488091 273.615869
57464 513 763227 15 915 clique 307.575529 622.424471 -16.682537 11.727209 2.038303e+05 5.599097e+05 -450.339433 271.716100
11018 1469 768904 77 4473 3 1504.805007 3045.194993 -36.806861 25.873867 2.056006e+05 5.647724e+05 -450.192241 271.627291
16018 80295 1198533 910 3646 80 1506.789365 3049.210635 -15.374281 10.807553 3.412993e+05 9.375287e+05 -446.765824 269.559933
143287 25081 891252 374 4106 magnifique 1481.654160 2998.345840 -28.776022 20.228483 2.445550e+05 6.717780e+05 -443.807683 267.775113
37717 17331 837722 365 2182 beauté 842.360077 1704.639923 -16.447398 11.561915 2.282003e+05 6.268527e+05 -441.423333 266.336496
183743 0 726335 0 659 psaume 217.948681 441.051319 -14.763085 10.377905 1.938475e+05 5.324875e+05 -440.281162 265.647357
216399 403 726150 18 594 svizzera 202.404542 409.595458 -12.961686 9.111587 1.939057e+05 5.326473e+05 -439.432043 265.135034
157113 36758 937320 437 1571 mythique 664.098561 1343.901439 -8.812479 6.194848 2.599663e+05 7.141117e+05 -437.775808 264.135731
216943 0 714137 0 414 switzerlandpicture 136.920719 277.079281 -11.701313 8.225592 1.905920e+05 5.235450e+05 -436.568489 263.407285
132945 1458 703460 5 395 lacleman 132.290550 267.709450 -11.067047 7.779726 1.881316e+05 5.167864e+05 -430.379992 259.673403
125425 7511 729543 90 1838 johnny 637.640451 1290.359549 -21.687405 15.245447 1.967082e+05 5.403458e+05 -426.582972 257.382439
248831 292681 2070334 3042 17399 être 6760.377833 13680.622167 -45.223902 31.790737 6.306519e+05 1.732363e+06 -425.583187 256.779210
174128 589430 3220373 5981 18285 petit 8025.406218 16240.593782 -22.820948 16.042286 1.016777e+06 2.793026e+06 -423.806761 255.707389
110243 5544 704174 37 4854 hallyday 1617.582700 3273.417300 -39.299224 27.625907 1.894127e+05 5.203053e+05 -422.477162 254.905164
33925 200723 1662537 2358 10967 avoir 4406.928948 8918.071052 -30.864474 21.696588 4.972751e+05 1.365985e+06 -420.535465 253.733625
237433 193748 1629315 2696 14725 vous 5761.584180 11659.415820 -40.387065 28.390618 4.865471e+05 1.336516e+06 -419.766061 253.269398
157097 25 656440 2 430 myswitzerland 142.873794 289.126206 -11.785660 8.284885 1.752003e+05 4.812647e+05 -418.509606 252.511305
25987 13399 736164 238 1836 animer 685.926502 1388.073498 -17.102830 12.022659 2.000467e+05 5.495163e+05 -417.308219 251.786438
190095 13655 736469 87 1078 regarde 385.296227 779.703773 -15.196738 10.682747 2.001964e+05 5.499276e+05 -416.914614 251.548954
43231 639731 3370363 6748 20734 bon 9089.022240 18392.977760 -24.555397 17.261539 1.070232e+06 2.939862e+06 -416.135641 251.078954
163636 16910 749257 169 246 nuage 137.251446 277.748554 2.709976 -1.905013 2.044780e+05 5.616890e+05 -414.796904 250.271215
226889 562 611242 28 265 trust 96.902828 196.097172 -6.999529 4.920411 1.632810e+05 4.485230e+05 -402.689577 242.966157
116815 153 608039 6 272 ig 91.941932 186.058068 -8.962895 6.300585 1.623170e+05 4.458750e+05 -402.506048 242.855423
55360 72844 1007386 1211 7006 chose 2717.578624 5499.421376 -28.900193 20.315771 2.882966e+05 7.919334e+05 -401.265607 242.106993
203722 4808 615966 45 876 seigneur 304.598991 616.401009 -14.874377 10.456139 1.656749e+05 4.550991e+05 -395.219491 238.459018
209021 65868 946456 861 2552 soleil 1128.769118 2284.230882 -7.969993 5.602611 2.701735e+05 7.421505e+05 -393.059865 237.155989
In [30]:
fig = px.bar(graph_interactions_nonmedia1, x="res_inter_nonmedia", y="mot", labels={"res_inter_nonmedia":"Résiduel de Pearson (<b>en fonction des interactions</b>)","mot":"Lemme (dans les posts Facebook <b>non-médias</b>)"}, orientation="h", title="Lemmes les plus caractéristiques des posts Facebook <b>non-médias</b> {} en 2020<br>(en fonction des interactions suscitées)".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="lightyellow", opacity=0.75, textfont_size=12)
fig

bigrammes

In [31]:
media2 = pan.read_csv("suisse-bigrammes-media-nettoye.csv", low_memory=False, names=["bigramme","interactions"])
nonmedia2 = pan.read_csv("suisse-bigrammes-nonmedia-nettoye.csv", low_memory=False, names=["bigramme","interactions"])
In [32]:
media2
Out[32]:
bigramme interactions
0 donner enfant 49596
1 enfant envie 49596
2 envie intéresser 49596
3 intéresser biodiversité 49596
4 biodiversité falloir 49596
... ... ...
4242484 complétez formulaire 0
4242485 formulaire ici 0
4242486 ici jardinière 0
4242487 jardinière recruter 0
4242488 recruter rtn 0

4242489 rows × 2 columns

In [33]:
nonmedia2
Out[33]:
bigramme interactions
0 falloir il 107933
1 il faire 107933
2 faire petit 107933
3 petit prince 107933
4 prince falloir 107933
... ... ...
8612244 indipendentemente dal 0
8612245 dal tuo 0
8612246 tuo background 0
8612247 background maggiori 0
8612248 maggiori informazioni 0

8612249 rows × 2 columns

In [34]:
media2_table = pan.pivot_table(media2,index=["bigramme"],values=["bigramme","interactions"],aggfunc=[len,np.sum])
nonmedia2_table = pan.pivot_table(nonmedia2,index=["bigramme"],values=["bigramme","interactions"],aggfunc=[len,np.sum])
In [35]:
media2_table
Out[35]:
len sum
interactions interactions
bigramme
#12h45rts chine 1 179
#19h30rt crèches 1 5
#19h30rt rtsinfo 1 10
#coronaviru déclarer 1 325
#couleurslocales romandise 1 74
... ... ...
수원 이동국 1 8
시즌 k리그 1 8
이동국 덕분에챌린지 1 8
이동국의 시즌 1 8
전북 수원 1 8

1673277 rows × 2 columns

In [36]:
nonmedia2_table
Out[36]:
len sum
interactions interactions
bigramme
#111 disposition 1 5
#6 ski 1 1
#7 surf 1 36
#allezlausanne lswil 1 105
#allezxamax mafamille 1 48
... ... ...
samedi cavelescretet 1 43
sea dahu 3 30
sen ira 1 0
sonore des 1 0
spontanes cie 2 0

2796740 rows × 2 columns

In [37]:
media2_table["type"] = "media"
nonmedia2_table["type"] = "non-media"
In [38]:
media2_table
Out[38]:
len sum type
interactions interactions
bigramme
#12h45rts chine 1 179 media
#19h30rt crèches 1 5 media
#19h30rt rtsinfo 1 10 media
#coronaviru déclarer 1 325 media
#couleurslocales romandise 1 74 media
... ... ... ...
수원 이동국 1 8 media
시즌 k리그 1 8 media
이동국 덕분에챌린지 1 8 media
이동국의 시즌 1 8 media
전북 수원 1 8 media

1673277 rows × 3 columns

In [39]:
nonmedia2_table
Out[39]:
len sum type
interactions interactions
bigramme
#111 disposition 1 5 non-media
#6 ski 1 1 non-media
#7 surf 1 36 non-media
#allezlausanne lswil 1 105 non-media
#allezxamax mafamille 1 48 non-media
... ... ... ...
samedi cavelescretet 1 43 non-media
sea dahu 3 30 non-media
sen ira 1 0 non-media
sonore des 1 0 non-media
spontanes cie 2 0 non-media

2796740 rows × 3 columns

In [40]:
tableau2 = pan.concat([media2_table,nonmedia2_table], names = ["bigramme","nb","interactions","type"], levels=0)
In [41]:
tableau2
Out[41]:
len sum type
interactions interactions
bigramme
#12h45rts chine 1 179 media
#19h30rt crèches 1 5 media
#19h30rt rtsinfo 1 10 media
#coronaviru déclarer 1 325 media
#couleurslocales romandise 1 74 media
... ... ... ...
samedi cavelescretet 1 43 non-media
sea dahu 3 30 non-media
sen ira 1 0 non-media
sonore des 1 0 non-media
spontanes cie 2 0 non-media

4470017 rows × 3 columns

In [42]:
tableau2["bigramme"] = tableau2.index
tableau2.columns = ["nb","interactions","media","bigramme"]
tableau2.reset_index(drop=True, inplace=True)
tableau2
Out[42]:
nb interactions media bigramme
0 1 179 media #12h45rts chine
1 1 5 media #19h30rt crèches
2 1 10 media #19h30rt rtsinfo
3 1 325 media #coronaviru déclarer
4 1 74 media #couleurslocales romandise
... ... ... ... ...
4470012 1 43 non-media samedi cavelescretet
4470013 3 30 non-media sea dahu
4470014 1 0 non-media sen ira
4470015 1 0 non-media sonore des
4470016 2 0 non-media spontanes cie

4470017 rows × 4 columns

In [43]:
khi2_2 = pan.pivot_table(tableau2,index=["bigramme"],columns=["media"],values=["bigramme","interactions"],aggfunc=[len,np.sum],fill_value=0)
In [44]:
khi2_2
Out[44]:
len sum
interactions nb interactions nb
media media non-media media non-media media non-media media non-media
bigramme
#111 disposition 0 1 0 1 0 5 0 1
#12h45rts chine 1 0 1 0 179 0 1 0
#19h30rt crèches 1 0 1 0 5 0 1 0
#19h30rt rtsinfo 1 0 1 0 10 0 1 0
#6 ski 0 1 0 1 0 1 0 1
... ... ... ... ... ... ... ... ...
samedi cavelescretet 0 1 0 1 0 43 0 1
sea dahu 0 1 0 1 0 30 0 3
sen ira 0 1 0 1 0 0 0 1
sonore des 0 1 0 1 0 0 0 1
spontanes cie 0 1 0 1 0 0 0 2

4023862 rows × 8 columns

In [45]:
khi2_2["bigramme"] = khi2_2.index
khi2_2.columns = khi2_2.columns.get_level_values(0)
khi2_2.reset_index(drop=True, inplace=True)
khi2_2.columns = ["a","b","c","d","interactions_media","interactions_nonmedia","obs_media","obs_nonmedia","bigramme"]
khi2_2 = khi2_2.drop(columns=["a","b","c","d"])
khi2_2
Out[45]:
interactions_media interactions_nonmedia obs_media obs_nonmedia bigramme
0 0 5 0 1 #111 disposition
1 179 0 1 0 #12h45rts chine
2 5 0 1 0 #19h30rt crèches
3 10 0 1 0 #19h30rt rtsinfo
4 0 1 0 1 #6 ski
... ... ... ... ... ...
4023857 0 43 0 1 samedi cavelescretet
4023858 0 30 0 3 sea dahu
4023859 0 0 0 1 sen ira
4023860 0 0 0 1 sonore des
4023861 0 0 0 2 spontanes cie

4023862 rows × 5 columns

In [46]:
khi2_2.obs_media.sum(), khi2_2.obs_nonmedia.sum()
Out[46]:
(4242489, 8612249)
In [47]:
print("Nb de lignes fichier media2 = ", media2.shape[0])
print("Somme observée média khi2_2 = ", khi2_2.obs_media.sum())

print("Nb de lignes  fichier nonmedia2 = ", nonmedia2.shape[0])
print("Somme observée nonmédia khi2_2 = ", khi2_2.obs_nonmedia.sum())
Nb de lignes fichier media2 =  4242489
Somme observée média khi2_2 =  4242489
Nb de lignes  fichier nonmedia2 =  8612249
Somme observée nonmédia khi2_2 =  8612249
In [48]:
khi2_2["exp_media"] = ((khi2_2.obs_media + khi2_2.obs_nonmedia) * khi2_2.obs_media.sum()) / (khi2_2.obs_media.sum() +khi2_2.obs_nonmedia.sum())
khi2_2["exp_nonmedia"] = ((khi2_2.obs_media + khi2_2.obs_nonmedia) * khi2_2.obs_nonmedia.sum()) / (khi2_2.obs_media.sum() +khi2_2.obs_nonmedia.sum())
khi2_2
Out[48]:
interactions_media interactions_nonmedia obs_media obs_nonmedia bigramme exp_media exp_nonmedia
0 0 5 0 1 #111 disposition 0.330033 0.669967
1 179 0 1 0 #12h45rts chine 0.330033 0.669967
2 5 0 1 0 #19h30rt crèches 0.330033 0.669967
3 10 0 1 0 #19h30rt rtsinfo 0.330033 0.669967
4 0 1 0 1 #6 ski 0.330033 0.669967
... ... ... ... ... ... ... ...
4023857 0 43 0 1 samedi cavelescretet 0.330033 0.669967
4023858 0 30 0 3 sea dahu 0.990099 2.009901
4023859 0 0 0 1 sen ira 0.330033 0.669967
4023860 0 0 0 1 sonore des 0.330033 0.669967
4023861 0 0 0 2 spontanes cie 0.660066 1.339934

4023862 rows × 7 columns

In [49]:
khi2_2["res_media"] = (khi2_2.obs_media - khi2_2.exp_media) / np.sqrt(khi2_2.exp_media)
khi2_2["res_nonmedia"] = (khi2_2.obs_nonmedia - khi2_2.exp_nonmedia) / np.sqrt(khi2_2.exp_nonmedia)
khi2_2
Out[49]:
interactions_media interactions_nonmedia obs_media obs_nonmedia bigramme exp_media exp_nonmedia res_media res_nonmedia
0 0 5 0 1 #111 disposition 0.330033 0.669967 -0.574485 0.403210
1 179 0 1 0 #12h45rts chine 0.330033 0.669967 1.166204 -0.818515
2 5 0 1 0 #19h30rt crèches 0.330033 0.669967 1.166204 -0.818515
3 10 0 1 0 #19h30rt rtsinfo 0.330033 0.669967 1.166204 -0.818515
4 0 1 0 1 #6 ski 0.330033 0.669967 -0.574485 0.403210
... ... ... ... ... ... ... ... ... ...
4023857 0 43 0 1 samedi cavelescretet 0.330033 0.669967 -0.574485 0.403210
4023858 0 30 0 3 sea dahu 0.990099 2.009901 -0.995037 0.698379
4023859 0 0 0 1 sen ira 0.330033 0.669967 -0.574485 0.403210
4023860 0 0 0 1 sonore des 0.330033 0.669967 -0.574485 0.403210
4023861 0 0 0 2 spontanes cie 0.660066 1.339934 -0.812445 0.570224

4023862 rows × 9 columns

In [50]:
graph_media2 = khi2_2.sort_values(by=["res_media"],ascending=False).head(50)
graph_media2
Out[50]:
interactions_media interactions_nonmedia obs_media obs_nonmedia bigramme exp_media exp_nonmedia res_media res_nonmedia
3471275 243252 6291 3765 124 suisse monde 1283.498716 2605.501284 69.265470 -48.614840
949395 392132 6995 3530 133 coronavirus suisse 1208.911236 2454.088764 66.756651 -46.853994
4010276 185290 408 2250 20 évolution coronavirus 749.175131 1520.824869 54.832537 -38.484905
642364 43477 16896 2392 374 canal alpha 912.871548 1853.128452 48.955451 -34.359998
3342541 159429 1719 1786 14 site app 594.059576 1205.940424 48.903456 -34.323504
338814 134019 37 1464 2 app évolution 483.828521 982.171479 44.561144 -31.275798
3094039 118892 666 1448 5 restezchezvous coronavirus 479.538091 973.461909 44.225321 -31.040097
3478120 181028 5939 1857 294 suivre direct 709.901193 1441.098807 43.052847 -30.217181
1761699 34516 8125 1502 126 hockey glace 537.293883 1090.706117 41.618755 -29.210646
1112001 129332 1473 1325 33 direct site 448.184946 909.815054 41.417069 -29.069090
900227 814330 271088 4713 3257 conseil fédéral 2630.363787 5339.636213 40.607430 -28.500835
2301805 110638 34678 2200 686 million franc 952.475519 1933.524481 40.422407 -28.370974
2480943 386653 20836 1613 265 nouveau cas 619.802157 1258.197843 39.894151 -28.000211
1461283 87414 115840 1928 966 fc sion 955.115784 1938.884216 31.479886 -22.094553
95344 275062 60164 1282 423 24 heure 562.706431 1142.293569 30.322516 -21.282239
2004782 29725 5413 799 77 jura bernois 289.108993 586.891007 29.987934 -21.047408
2217156 48313 14078 881 135 mardi soir 335.313627 680.686373 29.800085 -20.915564
2425935 21570 16317 924 169 national league 360.726176 732.273824 29.657241 -20.815307
3482191 26040 25202 1051 265 super league 434.323556 881.676444 29.590388 -20.768386
3102718 41001 12169 934 181 retrouver ici 367.986904 747.013096 29.505997 -20.709155
241871 227955 70029 973 225 alain berset 395.379651 802.620349 29.049277 -20.388600
3350212 41818 3756 767 86 ski alpin 281.518232 571.481768 28.934745 -20.308214
962868 39281 15604 993 246 coupe monde 408.911008 830.088992 28.884484 -20.272938
1148401 184281 52439 1924 1136 donald trump 1009.901278 2050.098722 28.764289 -20.188577
237383 5952 1441 639 20 air temps 217.491811 441.508189 28.581470 -20.060264
2092813 110377 27516 1026 288 lier coronavirus 433.663490 880.336510 28.444092 -19.963843
982615 70332 5249 700 72 covid-19 suisse 254.785551 517.214449 27.892141 -19.576449
2335608 22272 7143 706 78 monde coronavirus 258.745948 525.254052 27.804655 -19.515046
2444679 30942 22590 918 231 neuchâtel xamax 379.208029 769.791971 27.668292 -19.419338
2983365 18751 4440 775 128 radio lac 298.019887 604.980113 27.629794 -19.392318
2750359 17258 4764 655 59 play off 235.643632 478.356368 27.318408 -19.173768
1974336 117232 14779 973 290 joe biden 416.831802 846.168198 27.241168 -19.119556
2354798 20730 420 541 2 montreux mymontreuxch 179.207972 363.792028 27.025910 -18.968474
2451144 20335 0 536 0 news commune 176.897740 359.102260 26.999579 -18.949994
3617039 16214 3533 650 65 tour france 235.973665 479.026335 26.952323 -18.916826
650614 176976 67851 1420 738 canton vaud 712.211424 1445.788576 26.521561 -18.614491
838918 21668 1496 554 29 commune montreux 192.409296 390.590704 26.067791 -18.296006
3757896 63374 15485 771 198 vendredi soir 319.802072 649.197928 25.230535 -17.708368
900128 213881 101114 1968 1461 conseil etat 1131.683491 2297.316509 24.860404 -17.448587
962994 46928 25888 868 290 coupe suisse 382.178327 775.821673 24.850999 -17.441986
2547508 16595 242 471 11 open australie 159.075953 322.924047 24.731280 -17.357959
3986113 23367 9984 625 107 équipe suisse 241.584227 490.415773 24.668099 -17.313615
4010287 14902 255 441 7 évolution covid-19 147.854828 300.145172 24.108211 -16.920651
1673063 1849 1721 459 17 grand étape 157.095754 318.904246 24.087238 -16.905930
2528375 223949 64841 1182 620 office fédéral 594.719642 1207.280358 24.081822 -16.902129
2596245 129779 94033 1602 1084 pandémie coronavirus 886.468900 1799.531100 24.032379 -16.867427
3573796 153529 62319 1136 585 tester positif 567.986961 1153.013039 23.833571 -16.727891
2920496 77687 8712 653 151 président américain 265.346610 538.653390 23.797800 -16.702785
948060 25314 10051 542 74 coronavirus nouveau 203.300388 412.699612 23.754484 -16.672383
2983378 14534 453 429 8 radio lfm 144.224464 292.775536 23.712820 -16.643141
In [51]:
fig = px.bar(graph_media2, x="res_media", y="bigramme", labels={"res_media":"Résiduel de Pearson","bigramme":"Bigramme (dans les posts Facebook <b>médias</b>)"}, orientation="h", title="Bigrammes les plus caractéristiques des posts Facebook <b>médias</b> {} en 2020".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="navy", opacity=0.75, textfont_size=12)
fig
In [52]:
graph_nonmedia2 = khi2_2.sort_values(by=["res_nonmedia"],ascending=False).head(50)
graph_nonmedia2
Out[52]:
interactions_media interactions_nonmedia obs_media obs_nonmedia bigramme exp_media exp_nonmedia res_media res_nonmedia
3594105 15577 4542299 420 20376 timeline photo 6863.368296 13932.631704 -77.775767 54.587899
3329168 29 9351 7 9610 signe horoscope 3173.928299 6443.071701 -56.213377 39.454064
2030558 31334 16052 37 8792 koh lanta 2913.862218 5915.137782 -53.294766 37.405600
2710373 63129 1854100 1029 10642 photo from 3851.816281 7819.183719 -45.483076 31.922868
78602 382 7128 16 5366 2020 savoir 1776.238131 3605.761869 -41.765802 29.313853
3263658 40 5668 3 5282 savoir journée 1744.224920 3540.775080 -41.692087 29.262114
218804 0 7505 0 4457 agathe auproux 1470.957516 2986.042484 -38.353064 26.918580
3328866 4 11729 2 3988 signe astrologique 1316.832059 2673.167941 -36.233066 25.430633
1205092 381 54093 11 3752 découvrir recette 1241.914546 2521.085454 -34.928670 24.515125
670484 2395 14719 25 3789 casa papel 1258.746234 2555.253766 -34.774168 24.406686
3329609 5 4243 1 3307 signe zodiaque 1091.749487 2216.250513 -33.011368 23.169443
78501 22 3161 1 3174 2020 réserve 1047.855085 2127.144915 -32.339698 22.698023
3201351 266 5206 6 3043 réserve surprise 1006.270914 2042.729086 -31.532629 22.131572
1203954 128 3149 33 2968 découvrir horoscope 990.429326 2010.570674 -30.422506 21.352418
3011581 165 41872 6 2834 recette facile 937.293997 1902.706003 -30.419277 21.350151
2906273 0 2501 0 2797 prédire astre 923.102574 1873.897426 -30.382603 21.324411
4014678 0 2511 0 2777 être capricorne 916.501912 1860.498088 -30.273783 21.248034
3795090 0 2511 0 2777 vierge scorpion 916.501912 1860.498088 -30.273783 21.248034
656523 0 2511 0 2777 capricorne vierge 916.501912 1860.498088 -30.273783 21.248034
1205374 9 6219 1 2728 découvrir signe 900.660323 1828.339677 -29.977682 21.040213
3277501 0 2417 0 2717 scorpion journée 896.699926 1820.300074 -29.944948 21.017238
402713 0 2268 0 2597 astre être 857.095954 1739.904046 -29.276201 20.547869
3250639 162 3810 1 2588 santé découvrir 854.455689 1734.544311 -29.196864 20.492185
78022 0 2200 0 2557 2020 prédire 843.894630 1713.105370 -29.049865 20.389012
282451 1 11722 1 2527 amour argent 834.323671 1693.676329 -28.850041 20.248764
366488 0 4494 0 2519 argent santé 831.353373 1687.646627 -28.833199 20.236943
1991943 0 2709 0 2464 journée amour 813.201552 1650.798448 -28.516689 20.014796
1774500 67 3728 16 2424 horoscope jour 805.280758 1634.719242 -27.813642 19.521354
2986908 0 42420 0 2295 raison adopter 757.425959 1537.574041 -27.521373 19.316221
2710619 2624 7832 6 2251 photo instagram 744.884701 1512.115299 -27.072736 19.001340
1986171 51 3531 4 2214 jour gratuit 732.013410 1485.986590 -26.907903 18.885650
2678669 490 8518 14 2242 perdre poids 744.554668 1511.445332 -26.773455 18.791286
2696497 0 213059 0 2116 pet alert 698.350034 1417.649966 -26.426313 18.547640
1202766 83 286464 2 2110 découvrir avenir 697.029902 1414.970098 -26.325570 18.476932
1027908 6479 40081 52 2316 céline dion 781.518375 1586.481625 -26.095561 18.315497
2176130 7811 73532 161 2796 mai 2020 975.907869 1981.092131 -26.085796 18.308644
1931659 0 2532 0 1970 inès loucif 650.165202 1319.834798 -25.498337 17.896328
2227198 17600 159930 281 3087 mars 2020 1111.551472 2256.448528 -24.911608 17.484525
2698384 28527 296576 96 2313 petit fille 795.049732 1613.950268 -24.791964 17.400551
2000125 11578 86109 179 2586 juin 2020 912.541515 1852.458485 -24.282779 17.043173
1948390 233 9905 2 1724 jade hallyday 569.637126 1156.362874 -23.783275 16.692590
283791 0 286064 0 1658 amoureux 3 547.194876 1110.805124 -23.392197 16.418107
451258 0 286031 0 1649 avenir amoureux 544.224578 1104.775422 -23.328621 16.373486
104476 0 286130 0 1640 3 cliquer 541.254280 1098.745720 -23.264872 16.328743
3459583 436 25879 2 1635 succulent recette 540.264181 1096.735819 -23.157538 16.253409
857431 6233 72652 114 2128 compte instagram 739.934205 1502.065795 -23.010822 16.150434
2043436 2918 27488 9 1616 laeticia hallyday 536.303783 1088.696217 -22.769603 15.981132
1601404 4105 52037 170 2293 février 2020 812.871519 1650.128481 -22.548270 15.825787
3522972 5064 1545219 69 1850 série tv 633.333514 1285.666486 -22.424337 15.738802
1998626 14160 50831 207 2419 juillet 2020 866.666914 1759.333086 -22.407768 15.727173
In [53]:
fig = px.bar(graph_nonmedia2, x="res_nonmedia", y="bigramme", labels={"res_nonmedia":"Résiduel de Pearson","bigramme":"Bigramme (dans les posts Facebook <b>non-médias</b>)"}, orientation="h", title="Bigrammes les plus caractéristiques des posts Facebook <b>non-médias</b> {} en 2020".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="aqua", opacity=0.75, textfont_size=12)
fig
In [54]:
khi2_2["exp_inter_media"] = ((khi2_2.interactions_media + khi2_2.interactions_nonmedia) * khi2_2.interactions_media.sum()) / (khi2_2.interactions_media.sum() + khi2_2.interactions_nonmedia.sum())
khi2_2["exp_inter_nonmedia"] = ((khi2_2.interactions_media + khi2_2.interactions_nonmedia) * khi2_2.interactions_nonmedia.sum()) / (khi2_2.interactions_media.sum() + khi2_2.interactions_nonmedia.sum())
khi2_2
Out[54]:
interactions_media interactions_nonmedia obs_media obs_nonmedia bigramme exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia
0 0 5 0 1 #111 disposition 0.330033 0.669967 -0.574485 0.403210 1.328285 3.671715
1 179 0 1 0 #12h45rts chine 0.330033 0.669967 1.166204 -0.818515 47.552609 131.447391
2 5 0 1 0 #19h30rt crèches 0.330033 0.669967 1.166204 -0.818515 1.328285 3.671715
3 10 0 1 0 #19h30rt rtsinfo 0.330033 0.669967 1.166204 -0.818515 2.656570 7.343430
4 0 1 0 1 #6 ski 0.330033 0.669967 -0.574485 0.403210 0.265657 0.734343
... ... ... ... ... ... ... ... ... ... ... ...
4023857 0 43 0 1 samedi cavelescretet 0.330033 0.669967 -0.574485 0.403210 11.423253 31.576747
4023858 0 30 0 3 sea dahu 0.990099 2.009901 -0.995037 0.698379 7.969711 22.030289
4023859 0 0 0 1 sen ira 0.330033 0.669967 -0.574485 0.403210 0.000000 0.000000
4023860 0 0 0 1 sonore des 0.330033 0.669967 -0.574485 0.403210 0.000000 0.000000
4023861 0 0 0 2 spontanes cie 0.660066 1.339934 -0.812445 0.570224 0.000000 0.000000

4023862 rows × 11 columns

In [55]:
khi2_2["res_inter_media"] = (khi2_2.interactions_media - khi2_2.exp_inter_media) / np.sqrt(khi2_2.exp_inter_media)
khi2_2["res_inter_nonmedia"] = (khi2_2.interactions_nonmedia - khi2_2.exp_inter_nonmedia) / np.sqrt(khi2_2.exp_inter_nonmedia)
khi2_2
Out[55]:
interactions_media interactions_nonmedia obs_media obs_nonmedia bigramme exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia res_inter_media res_inter_nonmedia
0 0 5 0 1 #111 disposition 0.330033 0.669967 -0.574485 0.403210 1.328285 3.671715 -1.152513 0.693197
1 179 0 1 0 #12h45rts chine 0.330033 0.669967 1.166204 -0.818515 47.552609 131.447391 19.061839 -11.465051
2 5 0 1 0 #19h30rt crèches 0.330033 0.669967 1.166204 -0.818515 1.328285 3.671715 3.185835 -1.916172
3 10 0 1 0 #19h30rt rtsinfo 0.330033 0.669967 1.166204 -0.818515 2.656570 7.343430 4.505451 -2.709876
4 0 1 0 1 #6 ski 0.330033 0.669967 -0.574485 0.403210 0.265657 0.734343 -0.515419 0.310007
... ... ... ... ... ... ... ... ... ... ... ... ... ...
4023857 0 43 0 1 samedi cavelescretet 0.330033 0.669967 -0.574485 0.403210 11.423253 31.576747 -3.379830 2.032853
4023858 0 30 0 3 sea dahu 0.990099 2.009901 -0.995037 0.698379 7.969711 22.030289 -2.823068 1.697980
4023859 0 0 0 1 sen ira 0.330033 0.669967 -0.574485 0.403210 0.000000 0.000000 NaN NaN
4023860 0 0 0 1 sonore des 0.330033 0.669967 -0.574485 0.403210 0.000000 0.000000 NaN NaN
4023861 0 0 0 2 spontanes cie 0.660066 1.339934 -0.812445 0.570224 0.000000 0.000000 NaN NaN

4023862 rows × 13 columns

In [56]:
graph_interactions_media2 = khi2_2.sort_values(by=["res_inter_media"],ascending=False).head(50)
graph_interactions_media2
Out[56]:
interactions_media interactions_nonmedia obs_media obs_nonmedia bigramme exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia res_inter_media res_inter_nonmedia
900227 814330 271088 4713 3257 conseil fédéral 2630.363787 5339.636213 40.607430 -28.500835 288348.928578 797069.071422 979.514669 -589.144919
949395 392132 6995 3530 133 coronavirus suisse 1208.911236 2454.088764 66.756651 -46.853994 106030.895762 293096.104238 878.624519 -528.462909
2480943 386653 20836 1613 265 nouveau cas 619.802157 1258.197843 39.894151 -28.000211 108252.319896 299236.680104 846.158430 -508.935655
3471275 243252 6291 3765 124 suisse monde 1283.498716 2605.501284 69.265470 -48.614840 66292.853706 183250.146294 687.289418 -413.381322
3368363 373285 114310 777 446 soin intensif 403.630478 819.369522 18.584329 -13.043645 129533.042413 358061.957587 677.263747 -407.351220
3705513 290967 62746 301 280 universitaire genève 191.749230 389.250770 7.889649 -5.537449 93966.347134 259746.652866 642.660331 -386.538436
2482377 276778 53705 919 505 nouveau mesure 469.967131 954.032869 20.713066 -14.537725 87795.134191 242687.865809 637.803954 -383.617490
95344 275062 60164 1282 423 24 heure 562.706431 1142.293569 30.322516 -21.282239 89055.145512 246170.854488 623.303319 -374.895849
3790911 189631 1412 24 58 vie coronavirus 27.062714 54.937286 -0.588736 0.413212 50751.917107 140291.082893 616.468108 -370.784701
4010276 185290 408 2250 20 évolution coronavirus 749.175131 1520.824869 54.832537 -38.484905 49331.980250 136366.019750 612.125619 -368.172840
1795731 313013 105235 472 351 hôpital universitaire 271.617239 551.382761 12.158547 -8.533629 111110.523945 307137.476055 605.709029 -364.313479
3478120 181028 5939 1857 294 suivre direct 709.901193 1441.098807 43.052847 -30.217181 49669.099029 137297.900971 589.408460 -354.509238
2312231 225975 44929 350 199 mise point 181.188171 367.811829 12.541167 -8.802176 71967.553650 198936.446350 574.081233 -345.290430
3507473 167550 4267 71 115 symptôme covid-19 61.386156 124.613844 1.227049 -0.861220 45644.394935 126172.605065 570.597609 -343.195148
191435 159582 930 2 45 actuel maladie 15.511556 31.488444 -3.430660 2.407852 42641.142144 117870.857856 566.306910 -340.614438
1185184 202352 30387 328 224 début pandémie 182.178270 369.821730 10.803736 -7.582738 61828.752875 170910.247125 565.135948 -339.910144
3342541 159429 1719 1786 14 site app 594.059576 1205.940424 48.903456 -34.323504 42810.100019 118337.899981 563.632230 -339.005709
2597559 156361 0 9 0 pandémie revoir 2.970298 6.029702 3.498613 -2.455545 41538.399788 114822.600212 563.381389 -338.854836
1244397 155893 0 1 0 dérouter scientifique 0.330033 0.669967 1.166204 -0.818515 41414.072295 114478.927705 562.537637 -338.347348
980252 155893 0 1 0 covid-19 dérouter 0.330033 0.669967 1.166204 -0.818515 41414.072295 114478.927705 562.537637 -338.347348
112220 155893 0 1 0 369 ici 0.330033 0.669967 1.166204 -0.818515 41414.072295 114478.927705 562.537637 -338.347348
3109439 155893 0 1 0 revoir 369 0.330033 0.669967 1.166204 -0.818515 41414.072295 114478.927705 562.537637 -338.347348
1799839 155899 4 2 1 ici connaissance 0.990099 2.009901 1.014938 -0.712347 41416.728866 114486.271134 562.536024 -338.346378
3274708 155893 545 1 3 scientifique début 1.320132 2.679868 -0.278626 0.195557 41558.855380 114879.144620 560.846686 -337.330298
1133480 155914 821 2 1 diversité symptôme 0.990099 2.009901 1.014938 -0.712347 41637.755520 115097.244480 560.031305 -336.839874
892390 160488 8818 6 11 connaissance actuel 5.610563 11.389437 0.164412 -0.115395 44977.330118 124328.669882 544.659730 -327.594392
668659 173415 18480 688 181 cas coronavirus 286.798762 582.201238 23.690467 -16.627452 50978.256901 140916.743099 542.274619 -326.159828
2785155 299944 137392 1231 1275 port masque 827.062942 1678.937058 14.045733 -9.858174 116181.385446 321154.614554 539.124273 -324.265002
2528375 223949 64841 1182 620 office fédéral 594.719642 1207.280358 24.081822 -16.902129 76719.095394 212070.904606 531.550210 -319.709459
241871 227955 70029 973 225 alain berset 395.379651 802.620349 29.049277 -20.388600 79161.546182 218822.453818 528.842915 -318.081113
338814 134019 37 1464 2 app évolution 483.828521 982.171479 44.561144 -31.275798 35612.919603 98443.080397 521.456571 -313.638477
1178822 198935 48050 52 113 durée vie 54.455461 110.544539 -0.332746 0.233542 65613.303008 181371.696992 520.481003 -313.051706
1112001 129332 1473 1325 33 direct site 448.184946 909.815054 41.417069 -29.069090 34749.268579 96055.731421 507.386629 -305.175883
1596500 190643 49291 743 395 fédéral santé 375.577665 762.422335 18.959011 -13.306620 63740.155248 176193.844752 502.648973 -302.326343
3094039 118892 666 1448 5 restezchezvous coronavirus 479.538091 973.461909 44.225321 -31.040097 31761.423896 87796.576104 488.900637 -294.057185
1148401 184281 52439 1924 1136 donald trump 1009.901278 2050.098722 28.764289 -20.188577 62886.333535 173833.666465 484.084825 -291.160637
3069767 110710 0 6 0 reportage soin 1.980199 4.019801 2.856605 -2.004944 29410.890443 81299.109557 474.058243 -285.129987
2485760 117688 8583 256 30 nouvo rts 94.389466 191.610534 16.634429 -11.675083 33544.779578 92726.220422 459.416574 -276.323519
1899296 115809 7567 108 35 intensif hôpital 47.194733 95.805267 8.851043 -6.212215 32775.702459 90600.297541 458.644597 -275.859201
3642103 170928 52664 964 697 transport public 548.184975 1112.815025 17.759740 -12.464897 59398.787968 164193.212032 457.614569 -275.239674
3467748 113283 7600 520 88 suisse compte 200.660123 407.339877 22.543566 -15.822485 32113.419469 88769.580531 452.949751 -272.433944
668678 137850 26442 644 244 cas covid-19 293.069391 594.930609 20.499158 -14.387591 43645.325740 120646.674260 450.924241 -271.215668
900128 213881 101114 1968 1461 conseil etat 1131.683491 2297.316509 24.860404 -17.448587 83680.638019 231314.361981 450.090348 -270.714110
236404 98912 0 7 0 air etc 2.310232 4.689768 3.085486 -2.165587 26276.668734 72635.331266 448.087438 -269.509427
3708054 98858 0 5 0 up durée 1.650165 3.349835 2.607712 -1.830255 26262.323254 72595.676746 447.965107 -269.435849
727587 105008 3793 36 12 check up 15.841589 32.158411 5.064737 -3.554750 28903.751161 79897.248839 447.643001 -269.242114
1231518 98385 0 16 0 démêle vrai 5.280530 10.719470 4.664817 -3.274060 26136.667476 72248.332524 446.892146 -268.790499
1745482 102067 2222 12 136 heure air 48.844898 99.155102 -5.271907 3.700154 27705.106616 76583.893384 446.756136 -268.708694
1327037 97148 0 6 0 entendre durée 1.980199 4.019801 2.856605 -2.004944 25808.049722 71339.950278 444.073859 -267.095395
1376023 97148 0 6 1 etc entendre 2.310232 4.689768 2.427568 -1.703819 25808.049722 71339.950278 444.073859 -267.095395
In [57]:
fig = px.bar(graph_interactions_media2, x="res_inter_media", y="bigramme", labels={"res_inter_media":"Résiduel de Pearson (<b>en fonction des interactions</b>)","bigramme":"Bigramme (dans les posts Facebook <b>médias</b>)"}, orientation="h", title="Bigrammes les plus caractéristiques des posts Facebook <b>médias</b> {} en 2020<br>(en fonction des interactions suscitées)".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="blue", opacity=0.75, textfont_size=12)
fig
In [58]:
graph_interactions_nonmedia2 = khi2_2.sort_values(by=["res_inter_nonmedia"],ascending=False).head(50)
graph_interactions_nonmedia2
Out[58]:
interactions_media interactions_nonmedia obs_media obs_nonmedia bigramme exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia res_inter_media res_inter_nonmedia
3594105 15577 4542299 420 20376 timeline photo 6863.368296 13932.631704 -77.775767 54.587899 1.210832e+06 3.347044e+06 -1086.221995 653.325764
3522972 5064 1545219 69 1850 série tv 633.333514 1285.666486 -22.424337 15.738802 4.118436e+05 1.138439e+06 -633.859490 381.245029
2710373 63129 1854100 1029 10642 photo from 3851.816281 7819.183719 -45.483076 31.922868 5.093254e+05 1.407904e+06 -625.213547 376.044787
3414200 0 947087 0 955 souviens série 315.181608 639.818392 -17.753355 12.460415 2.516003e+05 6.954867e+05 -501.597772 301.694082
321787 2562 961053 85 1757 année 80 607.920966 1234.079034 -21.208624 14.885539 2.559911e+05 7.076239e+05 -500.891950 301.269554
791186 0 725245 0 774 clique image 255.445617 518.554383 -15.982666 11.217635 1.926664e+05 5.325786e+05 -438.937851 264.006260
1076444 3199 672725 29 1028 dessin animer 348.844984 708.155016 -17.124713 12.019195 1.795640e+05 4.963600e+05 -416.200622 250.330587
1975114 953 647673 21 1066 johnny hallyday 358.745977 728.254023 -17.831861 12.515515 1.723121e+05 4.763139e+05 -412.809074 248.290685
1076445 932 580058 22 1020 dessin animé 343.894488 698.105512 -17.358050 12.182965 1.543441e+05 4.266459e+05 -390.494186 234.869035
3674933 0 536864 0 436 tv alffr 143.894431 292.105569 -11.995600 8.419263 1.426217e+05 3.942423e+05 -377.652882 227.145426
3473957 108 536889 12 312 suisse switzerland 106.930724 217.069276 -9.180272 6.443289 1.426570e+05 3.943400e+05 -377.413716 227.001576
1249668 0 526121 0 446 détail série 147.194761 298.805239 -12.132385 8.515267 1.397677e+05 3.863533e+05 -373.855247 224.861277
259576 6990 548719 71 926 aller loin 329.042998 667.957002 -14.225441 9.984305 1.476280e+05 4.080810e+05 -366.031383 220.155488
3475246 0 483210 0 360 suisseromande romandie 118.811915 241.188085 -10.900088 7.650364 1.283681e+05 3.548419e+05 -358.284993 215.496296
1818865 0 454484 0 412 image regarde 135.973636 276.026364 -11.660773 8.184260 1.207369e+05 3.337471e+05 -347.472117 208.992717
249851 0 447036 0 686 alffr propose 226.402705 459.597295 -15.046684 10.560704 1.187583e+05 3.282777e+05 -344.613202 207.273176
2346415 0 439458 0 132 montagne mountain 43.564369 88.435631 -6.600331 4.632525 1.167451e+05 3.227129e+05 -341.679835 205.508856
3522433 10 425455 2 358 série mythique 118.811915 241.188085 -10.716603 7.521582 1.130278e+05 3.124372e+05 -336.166286 202.192642
1713036 0 399693 0 380 générique détail 125.412577 254.587423 -11.198776 7.860001 1.061813e+05 2.935117e+05 -325.854657 195.990547
1205541 628 392291 11 343 découvrir série 116.831717 237.168283 -9.791190 6.872069 1.043817e+05 2.885373e+05 -321.137782 193.153506
3272456 65 381425 3 239 schweiz svizzera 79.868010 162.131990 -8.601203 6.036862 1.013455e+05 2.801445e+05 -318.143905 191.352790
3594106 141 370029 5 322 timeline photos 107.920823 219.079177 -9.907193 6.953488 9.833826e+04 2.718317e+05 -313.139693 188.342926
249830 0 358995 0 841 alffr découvrir 277.557835 563.442165 -16.660067 11.693077 9.536955e+04 2.636255e+05 -308.819604 185.744539
3668066 0 344915 0 92 trust nature 30.363045 61.636955 -5.510267 3.867450 9.162910e+04 2.532859e+05 -302.702984 182.065599
3342498 0 335517 0 550 site alffr 181.518204 368.481796 -13.472869 9.456102 8.913245e+04 2.463845e+05 -298.550585 179.568072
3499876 0 331174 0 173 svizzera schweiz 57.095726 115.904274 -7.556171 5.303394 8.797870e+04 2.431953e+05 -296.612042 178.402103
3414159 0 330700 0 519 souviens dessin 171.287178 347.712822 -13.087673 9.185747 8.785278e+04 2.428472e+05 -296.399699 178.274387
504251 0 315819 0 4 beauté vie 1.320132 2.679868 -1.148970 0.806419 8.389954e+04 2.319195e+05 -289.654172 174.217180
2041590 1184 316529 27 166 lac montagne 63.696388 129.303612 -4.597968 3.227141 8.440269e+04 2.333103e+05 -286.445986 172.287565
3501474 0 304132 0 191 swiss montagne 63.036322 127.963678 -7.939542 5.572467 8.079481e+04 2.233372e+05 -284.244271 170.963308
2061713 0 304068 0 191 lausanne mylausanne 63.036322 127.963678 -7.939542 5.572467 8.077780e+04 2.232902e+05 -284.214362 170.945319
1808822 0 304065 0 190 igerslausanne vaud 62.706289 127.293711 -7.918730 5.557860 8.077701e+04 2.232880e+05 -284.212960 170.944475
2063548 0 304065 0 190 lausanneparcoeur lausanne 62.706289 127.293711 -7.918730 5.557860 8.077701e+04 2.232880e+05 -284.212960 170.944475
1808912 0 304065 0 190 iglausann igerslausanne 62.706289 127.293711 -7.918730 5.557860 8.077701e+04 2.232880e+05 -284.212960 170.944475
2346292 0 304065 0 190 montagne lacleman 62.706289 127.293711 -7.918730 5.557860 8.077701e+04 2.232880e+05 -284.212960 170.944475
2042499 0 304065 0 190 lacleman leman 62.706289 127.293711 -7.918730 5.557860 8.077701e+04 2.232880e+05 -284.212960 170.944475
3504770 0 304065 0 190 switzerland suisseromande 62.706289 127.293711 -7.918730 5.557860 8.077701e+04 2.232880e+05 -284.212960 170.944475
3136624 0 304065 0 190 romandie switzerlandpicture 62.706289 127.293711 -7.918730 5.557860 8.077701e+04 2.232880e+05 -284.212960 170.944475
2396895 0 304065 0 190 mylausanne iglausann 62.706289 127.293711 -7.918730 5.557860 8.077701e+04 2.232880e+05 -284.212960 170.944475
3745749 7 304065 1 190 vaud cantondevaud 63.036322 127.963678 -7.813590 5.484066 8.077887e+04 2.232931e+05 -284.191602 170.931630
652739 0 301932 0 189 cantondevaud suisse 62.376255 126.623745 -7.897864 5.543215 8.021036e+04 2.217216e+05 -283.214336 170.343837
508103 378 298771 22 283 bel soirée 100.660095 204.339905 -7.840176 5.502726 7.947104e+04 2.196780e+05 -280.565206 168.750475
3032687 0 288145 0 204 regarde générique 67.326752 136.673248 -8.205288 5.758984 7.654775e+04 2.115973e+05 -276.672634 166.409225
104476 0 286130 0 1640 3 cliquer 541.254280 1098.745720 -23.264872 16.328743 7.601245e+04 2.101176e+05 -275.703550 165.826354
283791 0 286064 0 1658 amoureux 3 547.194876 1110.805124 -23.392197 16.418107 7.599491e+04 2.100691e+05 -275.671751 165.807227
451258 0 286031 0 1649 avenir amoureux 544.224578 1104.775422 -23.328621 16.373486 7.598615e+04 2.100449e+05 -275.655850 165.797663
1202766 83 286464 2 2110 découvrir avenir 697.029902 1414.970098 -26.325570 18.476932 7.612323e+04 2.104238e+05 -275.603551 165.766207
2398263 0 285180 0 152 myswitzerland swiss 50.165031 101.834969 -7.082728 4.971101 7.576007e+04 2.094199e+05 -275.245479 165.550839
3504974 0 285180 0 152 switzerlandpicture svizzera 50.165031 101.834969 -7.082728 4.971101 7.576007e+04 2.094199e+05 -275.245479 165.550839
3272360 0 285180 0 152 schweiz myswitzerland 50.165031 101.834969 -7.082728 4.971101 7.576007e+04 2.094199e+05 -275.245479 165.550839
In [59]:
fig = px.bar(graph_interactions_nonmedia2, x="res_inter_nonmedia", y="bigramme", labels={"res_inter_nonmedia":"Résiduel de Pearson (<b>en fonction des interactions</b>)","bigramme":"Bigramme (dans les posts Facebook <b>non-médias</b>)"}, orientation="h", title="Bigrammes les plus caractéristiques des posts Facebook <b>non-médias</b> {} en 2020<br>(en fonction des interactions suscitées)".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="cyan", opacity=0.75)
fig

trigrammes

In [60]:
media3 = pan.read_csv("suisse-trigrammes-media-nettoye.csv", low_memory=False, names=["trigramme","interactions"])
nonmedia3 = pan.read_csv("suisse-trigrammes-nonmedia-nettoye.csv", low_memory=False, names=["trigramme","interactions"])
In [61]:
media3
Out[61]:
trigramme interactions
0 donner enfant envie 49596
1 enfant envie intéresser 49596
2 envie intéresser biodiversité 49596
3 intéresser biodiversité falloir 49596
4 biodiversité falloir emmener 49596
... ... ...
4091708 rtn complétez formulaire 0
4091709 complétez formulaire ici 0
4091710 formulaire ici jardinière 0
4091711 ici jardinière recruter 0
4091712 jardinière recruter rtn 0

4091713 rows × 2 columns

In [62]:
nonmedia3
Out[62]:
trigramme interactions
0 falloir il faire 107933
1 il faire petit 107933
2 faire petit prince 107933
3 petit prince falloir 107933
4 prince falloir patient 107933
... ... ...
8334960 competenze indipendentemente dal 0
8334961 indipendentemente dal tuo 0
8334962 dal tuo background 0
8334963 tuo background maggiori 0
8334964 background maggiori informazioni 0

8334965 rows × 2 columns

In [63]:
media3_table = pan.pivot_table(media3,index=["trigramme"],values=["trigramme","interactions"],aggfunc=[len,np.sum])
nonmedia3_table = pan.pivot_table(nonmedia3,index=["trigramme"],values=["trigramme","interactions"],aggfunc=[len,np.sum])
In [64]:
media3_table
Out[64]:
len sum
interactions interactions
trigramme
#12h45rts chine cloître 1 179
#19h30rt crèches bernois 1 5
#19h30rt rtsinfo période 1 10
#coronaviru déclarer noir 1 325
#covid19 autorité soutenir 1 199
... ... ...
수원 이동국 덕분에챌린지 1 8
시즌 k리그 리드를 1 8
이동국 덕분에챌린지 thefansgame 1 8
이동국의 시즌 k리그 1 8
전북 수원 이동국 1 8

2607587 rows × 2 columns

In [65]:
nonmedia3_table
Out[65]:
len sum
interactions interactions
trigramme
#111 disposition signaler 1 5
#6 ski orientation 1 1
#allezlausanne lswil dasilva 1 105
#allezxamax mafamille xama 1 48
#allezxamax pournoscouleur match 1 65
... ... ...
samedi cavelescretet chateauleveque 1 43
sea dahu solo 3 30
sen ira collectif 1 0
sonore des femmes 1 0
spontanes cie sevenacts 2 0

4343766 rows × 2 columns

In [66]:
media3_table["type"] = "media"
nonmedia3_table["type"] = "non-media"
In [67]:
media3_table
Out[67]:
len sum type
interactions interactions
trigramme
#12h45rts chine cloître 1 179 media
#19h30rt crèches bernois 1 5 media
#19h30rt rtsinfo période 1 10 media
#coronaviru déclarer noir 1 325 media
#covid19 autorité soutenir 1 199 media
... ... ... ...
수원 이동국 덕분에챌린지 1 8 media
시즌 k리그 리드를 1 8 media
이동국 덕분에챌린지 thefansgame 1 8 media
이동국의 시즌 k리그 1 8 media
전북 수원 이동국 1 8 media

2607587 rows × 3 columns

In [68]:
nonmedia3_table
Out[68]:
len sum type
interactions interactions
trigramme
#111 disposition signaler 1 5 non-media
#6 ski orientation 1 1 non-media
#allezlausanne lswil dasilva 1 105 non-media
#allezxamax mafamille xama 1 48 non-media
#allezxamax pournoscouleur match 1 65 non-media
... ... ... ...
samedi cavelescretet chateauleveque 1 43 non-media
sea dahu solo 3 30 non-media
sen ira collectif 1 0 non-media
sonore des femmes 1 0 non-media
spontanes cie sevenacts 2 0 non-media

4343766 rows × 3 columns

In [69]:
tableau3 = pan.concat([media3_table,nonmedia3_table], names = ["trigramme","nb","interactions","type"], levels=0)
In [70]:
tableau3
Out[70]:
len sum type
interactions interactions
trigramme
#12h45rts chine cloître 1 179 media
#19h30rt crèches bernois 1 5 media
#19h30rt rtsinfo période 1 10 media
#coronaviru déclarer noir 1 325 media
#covid19 autorité soutenir 1 199 media
... ... ... ...
samedi cavelescretet chateauleveque 1 43 non-media
sea dahu solo 3 30 non-media
sen ira collectif 1 0 non-media
sonore des femmes 1 0 non-media
spontanes cie sevenacts 2 0 non-media

6951353 rows × 3 columns

In [71]:
tableau3["trigramme"] = tableau3.index
tableau3.columns = ["nb","interactions","media","trigramme"]
tableau3.reset_index(drop=True, inplace=True)
tableau3
Out[71]:
nb interactions media trigramme
0 1 179 media #12h45rts chine cloître
1 1 5 media #19h30rt crèches bernois
2 1 10 media #19h30rt rtsinfo période
3 1 325 media #coronaviru déclarer noir
4 1 199 media #covid19 autorité soutenir
... ... ... ... ...
6951348 1 43 non-media samedi cavelescretet chateauleveque
6951349 3 30 non-media sea dahu solo
6951350 1 0 non-media sen ira collectif
6951351 1 0 non-media sonore des femmes
6951352 2 0 non-media spontanes cie sevenacts

6951353 rows × 4 columns

In [72]:
khi2_3 = pan.pivot_table(tableau3,index=["trigramme"],columns=["media"],values=["trigramme","interactions"],aggfunc=[len,np.sum],fill_value=0)
In [73]:
khi2_3
Out[73]:
len sum
interactions nb interactions nb
media media non-media media non-media media non-media media non-media
trigramme
#111 disposition signaler 0 1 0 1 0 5 0 1
#12h45rts chine cloître 1 0 1 0 179 0 1 0
#19h30rt crèches bernois 1 0 1 0 5 0 1 0
#19h30rt rtsinfo période 1 0 1 0 10 0 1 0
#6 ski orientation 0 1 0 1 0 1 0 1
... ... ... ... ... ... ... ... ...
samedi cavelescretet chateauleveque 0 1 0 1 0 43 0 1
sea dahu solo 0 1 0 1 0 30 0 3
sen ira collectif 0 1 0 1 0 0 0 1
sonore des femmes 0 1 0 1 0 0 0 1
spontanes cie sevenacts 0 1 0 1 0 0 0 2

6749558 rows × 8 columns

In [74]:
khi2_3["trigramme"] = khi2_3.index
khi2_3.columns = khi2_3.columns.get_level_values(0)
khi2_3.reset_index(drop=True, inplace=True)
khi2_3.columns = ["a","b","c","d","interactions_media","interactions_nonmedia","obs_media","obs_nonmedia","trigramme"]
khi2_3 = khi2_3.drop(columns=["a","b","c","d"])
khi2_3
Out[74]:
interactions_media interactions_nonmedia obs_media obs_nonmedia trigramme
0 0 5 0 1 #111 disposition signaler
1 179 0 1 0 #12h45rts chine cloître
2 5 0 1 0 #19h30rt crèches bernois
3 10 0 1 0 #19h30rt rtsinfo période
4 0 1 0 1 #6 ski orientation
... ... ... ... ... ...
6749553 0 43 0 1 samedi cavelescretet chateauleveque
6749554 0 30 0 3 sea dahu solo
6749555 0 0 0 1 sen ira collectif
6749556 0 0 0 1 sonore des femmes
6749557 0 0 0 2 spontanes cie sevenacts

6749558 rows × 5 columns

In [75]:
khi2_3.obs_media.sum(), khi2_3.obs_nonmedia.sum()
Out[75]:
(4091713, 8334965)
In [76]:
print("Nb de lignes fichier media3 = ", media3.shape[0])
print("Somme observée média khi2_3 = ", khi2_3.obs_media.sum())

print("Nb de lignes  fichier nonmedia3 = ", nonmedia3.shape[0])
print("Somme observée nonmédia khi2_3 = ", khi2_3.obs_nonmedia.sum())
Nb de lignes fichier media3 =  4091713
Somme observée média khi2_3 =  4091713
Nb de lignes  fichier nonmedia3 =  8334965
Somme observée nonmédia khi2_3 =  8334965
In [77]:
khi2_3["exp_media"] = ((khi2_3.obs_media + khi2_3.obs_nonmedia) * khi2_3.obs_media.sum()) / (khi2_3.obs_media.sum() + khi2_3.obs_nonmedia.sum())
khi2_3["exp_nonmedia"] = ((khi2_3.obs_media + khi2_3.obs_nonmedia) * khi2_3.obs_nonmedia.sum()) / (khi2_3.obs_media.sum() + khi2_3.obs_nonmedia.sum())
khi2_3
Out[77]:
interactions_media interactions_nonmedia obs_media obs_nonmedia trigramme exp_media exp_nonmedia
0 0 5 0 1 #111 disposition signaler 0.329268 0.670732
1 179 0 1 0 #12h45rts chine cloître 0.329268 0.670732
2 5 0 1 0 #19h30rt crèches bernois 0.329268 0.670732
3 10 0 1 0 #19h30rt rtsinfo période 0.329268 0.670732
4 0 1 0 1 #6 ski orientation 0.329268 0.670732
... ... ... ... ... ... ... ...
6749553 0 43 0 1 samedi cavelescretet chateauleveque 0.329268 0.670732
6749554 0 30 0 3 sea dahu solo 0.987805 2.012195
6749555 0 0 0 1 sen ira collectif 0.329268 0.670732
6749556 0 0 0 1 sonore des femmes 0.329268 0.670732
6749557 0 0 0 2 spontanes cie sevenacts 0.658537 1.341463

6749558 rows × 7 columns

In [78]:
khi2_3["res_media"] = (khi2_3.obs_media - khi2_3.exp_media) / np.sqrt(khi2_3.exp_media)
khi2_3["res_nonmedia"] = (khi2_3.obs_nonmedia - khi2_3.exp_nonmedia) / np.sqrt(khi2_3.exp_nonmedia)
khi2_3
Out[78]:
interactions_media interactions_nonmedia obs_media obs_nonmedia trigramme exp_media exp_nonmedia res_media res_nonmedia
0 0 5 0 1 #111 disposition signaler 0.329268 0.670732 -0.573819 0.402046
1 179 0 1 0 #12h45rts chine cloître 0.329268 0.670732 1.168890 -0.818982
2 5 0 1 0 #19h30rt crèches bernois 0.329268 0.670732 1.168890 -0.818982
3 10 0 1 0 #19h30rt rtsinfo période 0.329268 0.670732 1.168890 -0.818982
4 0 1 0 1 #6 ski orientation 0.329268 0.670732 -0.573819 0.402046
... ... ... ... ... ... ... ... ... ...
6749553 0 43 0 1 samedi cavelescretet chateauleveque 0.329268 0.670732 -0.573819 0.402046
6749554 0 30 0 3 sea dahu solo 0.987805 2.012195 -0.993884 0.696364
6749555 0 0 0 1 sen ira collectif 0.329268 0.670732 -0.573819 0.402046
6749556 0 0 0 1 sonore des femmes 0.329268 0.670732 -0.573819 0.402046
6749557 0 0 0 2 spontanes cie sevenacts 0.658537 1.341463 -0.811503 0.568579

6749558 rows × 9 columns

In [79]:
graph_media3 = khi2_3.sort_values(by=["res_media"],ascending=False).head(50)
graph_media3
Out[79]:
interactions_media interactions_nonmedia obs_media obs_nonmedia trigramme exp_media exp_nonmedia res_media res_nonmedia
1568440 206573 272 2597 7 coronavirus suisse monde 857.415043 1746.584957 59.408706 -41.624668
6724272 165207 198 2086 4 évolution coronavirus suisse 688.171060 1401.828940 53.285105 -37.334171
5597189 134019 37 1464 2 site app évolution 482.707547 983.292453 44.663877 -31.293714
5849788 126391 38 1301 3 suivre direct site 429.366058 874.633942 42.064940 -29.472771
1849265 126366 37 1298 2 direct site app 428.048985 871.951015 42.048263 -29.461086
576934 125325 37 1289 2 app évolution coronavirus 425.085569 865.914431 41.901787 -29.358458
5825240 18561 0 654 0 suisse monde coronavirus 215.341566 438.658434 29.892521 -20.944174
1378666 20335 0 536 0 commune montreux mymontreuxch 176.487889 359.512111 27.061762 -18.960805
4072431 20335 0 536 0 news commune montreux 176.487889 359.512111 27.061762 -18.960805
1631147 16517 0 466 0 covid-19 suisse monde 153.439098 312.560902 25.232869 -17.679392
6724299 12835 0 422 0 évolution covid-19 suisse 138.951286 283.048714 24.012087 -16.824052
3462846 38444 181 425 6 lier coronavirus suisse 141.914702 289.085298 23.763129 -16.649619
5203865 7420 0 373 0 retrouver ici principal 122.817132 250.182868 22.575017 -15.817170
2481740 6473 0 353 0 fin direct retrouver 116.231763 236.768237 21.961451 -15.387275
1849030 7033 0 348 0 direct retrouver ici 114.585420 233.414580 21.805362 -15.277911
2986724 12655 0 341 0 ici info principal 112.280541 228.719459 21.584941 -15.123474
5203815 11724 65 336 2 retrouver ici info 111.292736 226.707264 21.300201 -14.923971
323173 26922 5 332 1 actualité lier coronavirus 109.646394 223.353606 21.234743 -14.878108
3692880 39547 9 319 2 mars suivre direct 105.695172 215.304828 20.747850 -14.536967
2963138 7518 355 317 3 humour jérémy vaillot 105.365904 214.634096 20.617480 -14.445623
5010542 6754 498 349 22 radio lac matin 122.158595 248.841405 20.523920 -14.380070
2786134 1278 0 306 0 grand étape pandémie 100.756146 205.243854 20.447226 -14.326334
5825339 22695 5 303 1 suisse monde site 100.097609 203.902391 20.280344 -14.209409
3897037 22124 5 299 1 monde site app 98.780535 201.219465 20.145155 -14.114689
3279384 2717 0 294 0 jour choix midi 96.804924 197.195076 20.042290 -14.042616
3761943 2717 4 294 1 menu jour choix 97.134193 197.865807 19.974882 -13.995387
4751287 4852 4 276 1 principal info jour 91.207361 185.792639 19.349499 -13.557213
2522551 1424 0 269 0 fois jour point 88.573213 180.426787 19.171223 -13.432304
3287772 1424 0 269 0 jour point situation 88.573213 180.426787 19.171223 -13.432304
4211409 190527 48776 738 389 office fédéral santé 371.085543 755.914457 19.047054 -13.345305
3889057 4281 0 263 0 monde coronavirus nouveau 86.597602 176.402398 18.956212 -13.281656
778176 28216 28 262 1 avril suivre direct 86.597602 176.402398 18.848752 -13.206365
3412481 13166 30193 548 218 lausanne hockey club 252.219632 513.780368 18.624297 -13.049101
2988776 4378 0 252 0 ici principal info 82.975649 169.024351 18.555555 -13.000937
2650247 185113 48437 709 380 fédéral santé public 358.573342 730.426658 18.505812 -12.966084
3292806 4219 0 249 0 jour évolution covid-19 81.987844 167.012156 18.444774 -12.923318
3270642 2249 4 248 1 jouer ici concours 81.987844 167.012156 18.334335 -12.845939
1096789 68249 1407 268 12 cas 24 heure 92.195166 187.804834 18.309511 -12.828546
3086667 4224 0 243 0 info jour évolution 80.012233 162.987767 18.221193 -12.766666
4120436 65884 1360 258 10 nouveau cas 24 88.243945 179.756055 18.071026 -12.661452
4619272 1193 0 234 0 point situation travers 77.048817 156.951183 17.880580 -12.528016
5608033 1193 0 234 0 situation travers carte 77.048817 156.951183 17.880580 -12.528016
5010507 4275 18 215 3 radio lac genève 71.780522 146.219478 16.904362 -11.844029
3605996 15944 63 211 1 mai suivre direct 69.804911 142.195089 16.899607 -11.840698
6323170 5586 354 208 1 version ligne ici 68.817106 140.182894 16.777902 -11.755425
1245579 1314 0 206 0 chronique air temps 67.829301 138.170699 16.776729 -11.754603
2713878 3980 18 209 3 genève 918 lausanne 69.804911 142.195089 16.660227 -11.672977
3378019 3980 18 209 3 lac genève 918 69.804911 142.195089 16.660227 -11.672977
248927 3980 18 209 3 918 lausanne 956 69.804911 142.195089 16.660227 -11.672977
5939571 2078 4 200 1 tarder jouer ici 66.182958 134.817042 16.448956 -11.524949
In [80]:
fig = px.bar(graph_media3, x="res_media", y="trigramme", labels={"res_media":"Résiduel de Pearson","trigramme":"Trigramme (dans les posts Facebook <b>médias</b>)"}, orientation="h", title="Trigrammes les plus caractéristiques des posts Facebook <b>médias</b> {} en 2020".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="DarkOliveGreen", opacity=0.75, textfont_size=12)
fig
In [81]:
graph_nonmedia3 = khi2_3.sort_values(by=["res_nonmedia"],ascending=False).head(50)
graph_nonmedia3
Out[81]:
interactions_media interactions_nonmedia obs_media obs_nonmedia trigramme exp_media exp_nonmedia res_media res_nonmedia
138662 0 5615 0 5280 2020 savoir journée 1738.537414 3541.462586 -41.695772 29.214114
138273 0 2604 0 2857 2020 réserve surprise 940.719961 1916.280039 -30.671158 21.489726
6732580 0 2511 0 2777 être capricorne vierge 914.378485 1862.621515 -30.238692 21.186718
1080300 0 2511 0 2777 capricorne vierge scorpion 914.378485 1862.621515 -30.238692 21.186718
6367242 0 2417 0 2717 vierge scorpion journée 894.622378 1822.377622 -29.910239 20.956588
678679 0 2268 0 2597 astre être capricorne 855.110164 1741.889836 -29.242267 20.488574
4880452 0 2268 0 2597 prédire astre être 855.110164 1741.889836 -29.242267 20.488574
136895 0 2200 0 2557 2020 prédire astre 841.939426 1715.060574 -29.016192 20.330175
465606 0 2159 0 2517 amour argent santé 828.768688 1688.231312 -28.788343 20.170532
5444549 0 2159 0 2517 santé découvrir horoscope 828.768688 1688.231312 -28.788343 20.170532
620195 0 2159 0 2517 argent santé découvrir 828.768688 1688.231312 -28.788343 20.170532
5468058 0 2095 0 2440 savoir journée amour 803.415017 1636.584983 -28.344577 19.859608
3297834 0 2079 0 2420 journée amour argent 796.829648 1623.170352 -28.228171 19.778048
2945607 0 3458 0 2192 horoscope jour gratuit 721.756442 1470.243558 -26.865525 18.823311
2007844 0 1057 0 1758 découvrir signe horoscope 578.853935 1179.146065 -24.059384 16.857190
755816 0 285940 0 1635 avenir amoureux 3 538.353915 1096.646085 -23.202455 16.256784
467945 0 285940 0 1635 amoureux 3 cliquer 538.353915 1096.646085 -23.202455 16.256784
1998481 0 285940 0 1635 découvrir avenir amoureux 538.353915 1096.646085 -23.202455 16.256784
5468066 0 1808 0 1460 savoir journée horoscope 480.731937 979.268063 -21.925600 15.362156
2008257 0 22601 0 1433 découvrir succulent recette 471.841688 961.158312 -21.721917 15.219446
2006796 20 17303 1 1222 découvrir recette facile 402.695314 820.304686 -20.017437 14.025203
5011124 117 16269 2 1144 radio one fm 377.341643 768.658357 -19.322325 13.538173
6411919 0 31929 0 1130 visible plate forme 372.073348 757.926652 -19.289203 13.514966
4590935 0 31929 0 1130 plate forme petalert 372.073348 757.926652 -19.289203 13.514966
1302601 0 31929 0 1130 clos visible plate 372.073348 757.926652 -19.289203 13.514966
3364234 40 1512 1 1134 koh lanta terre 373.719690 761.280310 -19.280103 13.508591
570509 12 1486 1 1121 août 2020 savoir 369.439201 752.560799 -19.168774 13.430588
35932 0 2680 0 1103 12 prénom porter 363.183100 739.816900 -19.057363 13.352528
3310539 0 1282 0 1040 juillet 2020 savoir 342.439188 697.560812 -18.505112 12.965594
4885080 0 2946 0 1016 prénom porter femme 334.536745 681.463255 -18.290346 12.815118
5576090 0 324 0 1015 signe horoscope incompatible 334.207476 680.792524 -18.281342 12.808810
5052883 0 13435 0 1013 recette facile découvrir 333.548940 679.451060 -18.263322 12.796184
2560085 0 27536 0 1008 forme petalert france 331.902597 676.097403 -18.218194 12.764565
34334 1325 10434 7 1029 12 coup midi 341.122114 694.877886 -18.090488 12.675088
2361509 0 13155 0 993 facile découvrir recette 326.963571 666.036429 -18.082134 12.669235
2146664 0 2394 0 942 enceint 11 an 310.170880 631.829120 -17.611669 12.339603
2819253 0 1091 0 920 grâce horoscope jour 302.926974 617.073026 -17.404797 12.194659
411004 0 97301 0 917 alert pet alert 301.939168 615.060832 -17.376397 12.174760
4502779 0 18855 0 915 petit fille tuer 301.280631 613.719369 -17.357437 12.161476
3636615 0 18855 0 915 maltraiter propre parent 301.280631 613.719369 -17.357437 12.161476
6159193 0 18855 0 915 tuer maltraiter propre 301.280631 613.719369 -17.357437 12.161476
2471403 0 18855 0 915 fille tuer maltraiter 301.280631 613.719369 -17.357437 12.161476
3673276 0 11506 0 870 marecettech propose découvrir 286.463551 583.536449 -16.925234 11.858653
4497080 0 90169 0 866 pet alert pet 285.146477 580.853523 -16.886281 11.831361
4870637 0 42900 0 846 provoquer trouble santé 278.561108 567.438892 -16.690150 11.693942
6710478 0 583 0 835 étude faire astrologue 274.939156 560.060844 -16.581289 11.617669
5600733 0 263 0 825 site rencontre gratuit 271.646471 553.353529 -16.481701 11.547892
1128688 0 67069 0 818 centre accueil universel 269.341592 548.658408 -16.411630 11.498797
5599790 0 10462 0 810 site marecettech propose 266.707444 543.292556 -16.331180 11.442430
6472205 0 6814 0 802 vouloir célibataire texte 264.073297 537.926703 -16.250332 11.385784
In [82]:
fig = px.bar(graph_nonmedia3, x="res_nonmedia", y="trigramme", labels={"res_nonmedia":"Résiduel de Pearson","trigramme":"Trigramme (dans les posts Facebook <b>non-médias</b>)"}, orientation="h", title="Trigrammes les plus caractéristiques des posts Facebook <b>non-médias</b> {} en 2020".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="Olive", opacity=0.75, textfont_size=12)
fig
In [83]:
khi2_3["exp_inter_media"] = ((khi2_3.interactions_media + khi2_3.interactions_nonmedia) * khi2_3.interactions_media.sum()) / (khi2_3.interactions_media.sum() + khi2_3.interactions_nonmedia.sum())
khi2_3["exp_inter_nonmedia"] = ((khi2_3.interactions_media + khi2_3.interactions_nonmedia) * khi2_3.interactions_nonmedia.sum()) / (khi2_3.interactions_media.sum() + khi2_3.interactions_nonmedia.sum())
khi2_3
Out[83]:
interactions_media interactions_nonmedia obs_media obs_nonmedia trigramme exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia
0 0 5 0 1 #111 disposition signaler 0.329268 0.670732 -0.573819 0.402046 1.321568 3.678432
1 179 0 1 0 #12h45rts chine cloître 0.329268 0.670732 1.168890 -0.818982 47.312136 131.687864
2 5 0 1 0 #19h30rt crèches bernois 0.329268 0.670732 1.168890 -0.818982 1.321568 3.678432
3 10 0 1 0 #19h30rt rtsinfo période 0.329268 0.670732 1.168890 -0.818982 2.643136 7.356864
4 0 1 0 1 #6 ski orientation 0.329268 0.670732 -0.573819 0.402046 0.264314 0.735686
... ... ... ... ... ... ... ... ... ... ... ...
6749553 0 43 0 1 samedi cavelescretet chateauleveque 0.329268 0.670732 -0.573819 0.402046 11.365485 31.634515
6749554 0 30 0 3 sea dahu solo 0.987805 2.012195 -0.993884 0.696364 7.929408 22.070592
6749555 0 0 0 1 sen ira collectif 0.329268 0.670732 -0.573819 0.402046 0.000000 0.000000
6749556 0 0 0 1 sonore des femmes 0.329268 0.670732 -0.573819 0.402046 0.000000 0.000000
6749557 0 0 0 2 spontanes cie sevenacts 0.658537 1.341463 -0.811503 0.568579 0.000000 0.000000

6749558 rows × 11 columns

In [84]:
khi2_3["res_inter_media"] = (khi2_3.interactions_media - khi2_3.exp_inter_media) / np.sqrt(khi2_3.exp_inter_media)
khi2_3["res_inter_nonmedia"] = (khi2_3.interactions_nonmedia - khi2_3.exp_inter_nonmedia) / np.sqrt(khi2_3.exp_inter_nonmedia)
khi2_3
Out[84]:
interactions_media interactions_nonmedia obs_media obs_nonmedia trigramme exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia res_inter_media res_inter_nonmedia
0 0 5 0 1 #111 disposition signaler 0.329268 0.670732 -0.573819 0.402046 1.321568 3.678432 -1.149595 0.689062
1 179 0 1 0 #12h45rts chine cloître 0.329268 0.670732 1.168890 -0.818982 47.312136 131.687864 19.145181 -11.475533
2 5 0 1 0 #19h30rt crèches bernois 0.329268 0.670732 1.168890 -0.818982 1.321568 3.678432 3.199764 -1.917924
3 10 0 1 0 #19h30rt rtsinfo période 0.329268 0.670732 1.168890 -0.818982 2.643136 7.356864 4.525150 -2.712354
4 0 1 0 1 #6 ski orientation 0.329268 0.670732 -0.573819 0.402046 0.264314 0.735686 -0.514114 0.308158
... ... ... ... ... ... ... ... ... ... ... ... ... ...
6749553 0 43 0 1 samedi cavelescretet chateauleveque 0.329268 0.670732 -0.573819 0.402046 11.365485 31.634515 -3.371274 2.020726
6749554 0 30 0 3 sea dahu solo 0.987805 2.012195 -0.993884 0.696364 7.929408 22.070592 -2.815920 1.687850
6749555 0 0 0 1 sen ira collectif 0.329268 0.670732 -0.573819 0.402046 0.000000 0.000000 NaN NaN
6749556 0 0 0 1 sonore des femmes 0.329268 0.670732 -0.573819 0.402046 0.000000 0.000000 NaN NaN
6749557 0 0 0 2 spontanes cie sevenacts 0.658537 1.341463 -0.811503 0.568579 0.000000 0.000000 NaN NaN

6749558 rows × 13 columns

In [85]:
graph_interactions_media3 = khi2_3.sort_values(by=["res_inter_media"],ascending=False).head(50)
graph_interactions_media3
Out[85]:
interactions_media interactions_nonmedia obs_media obs_nonmedia trigramme exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia res_inter_media res_inter_nonmedia
1568440 206573 272 2597 7 coronavirus suisse monde 857.415043 1746.584957 59.408706 -41.624668 54671.948378 152173.051622 649.648597 -389.396376
2977509 290704 62285 300 266 hôpital universitaire genève 186.365943 379.634057 8.323869 -5.832113 93299.796398 259689.203602 646.273054 -387.373091
1956122 188166 1052 13 22 durée vie coronavirus 11.524396 23.475604 0.434671 -0.304552 50012.892398 139205.107602 617.759841 -370.282403
6724272 165207 198 2086 4 évolution coronavirus suisse 688.171060 1401.828940 53.285105 -37.334171 43718.792436 121686.207564 581.031992 -348.267899
1463794 159582 0 2 0 connaissance actuel maladie 0.658537 1.341463 1.653060 -1.158215 42179.694293 117402.305707 571.643030 -342.640199
2071319 155893 0 1 0 dérouter scientifique début 0.329268 0.670732 1.168890 -0.818982 41204.641391 114688.358609 564.997164 -338.656697
5892160 155893 0 1 0 symptôme covid-19 dérouter 0.329268 0.670732 1.168890 -0.818982 41204.641391 114688.358609 564.997164 -338.656697
5483905 155893 0 1 0 scientifique début pandémie 0.329268 0.670732 1.168890 -0.818982 41204.641391 114688.358609 564.997164 -338.656697
4322927 155893 0 1 0 pandémie revoir 369 0.329268 0.670732 1.168890 -0.818982 41204.641391 114688.358609 564.997164 -338.656697
202656 155893 0 1 0 369 ici connaissance 0.329268 0.670732 1.168890 -0.818982 41204.641391 114688.358609 564.997164 -338.656697
1623665 155893 0 1 0 covid-19 dérouter scientifique 0.329268 0.670732 1.168890 -0.818982 41204.641391 114688.358609 564.997164 -338.656697
1884105 155893 0 1 0 diversité symptôme covid-19 0.329268 0.670732 1.168890 -0.818982 41204.641391 114688.358609 564.997164 -338.656697
2985005 155893 0 1 0 ici connaissance actuel 0.329268 0.670732 1.168890 -0.818982 41204.641391 114688.358609 564.997164 -338.656697
1967280 155893 0 1 0 début pandémie revoir 0.329268 0.670732 1.168890 -0.818982 41204.641391 114688.358609 564.997164 -338.656697
5217274 155893 0 1 0 revoir 369 ici 0.329268 0.670732 1.168890 -0.818982 41204.641391 114688.358609 564.997164 -338.656697
5597189 134019 37 1464 2 site app évolution 482.707547 983.292453 44.663877 -31.293714 35432.825119 98623.174881 523.736843 -313.925451
5849788 126391 38 1301 3 suivre direct site 429.366058 874.633942 42.064940 -29.472771 33416.905226 93012.094774 508.602915 -304.854244
1849265 126366 37 1298 2 direct site app 428.048985 871.951015 42.048263 -29.461086 33410.033072 92992.966928 508.556044 -304.826150
576934 125325 37 1289 2 app évolution coronavirus 425.085569 865.914431 41.901787 -29.358458 33134.882605 92227.117395 506.455918 -303.567344
4211409 190527 48776 738 389 office fédéral santé 371.085543 755.914457 19.047054 -13.345305 63251.039487 176051.960513 506.072280 -303.337393
2650247 185113 48437 709 380 fédéral santé public 358.573342 730.426658 18.505812 -12.966084 61730.443296 171819.556704 496.596972 -297.657938
5644634 115484 7405 107 33 soin intensif hôpital 46.097583 93.902417 8.970062 -6.284868 32481.235051 90407.764949 460.549476 -276.051234
3138184 100303 750 15 6 intensif hôpital universitaire 6.914637 14.085363 3.074785 -2.154346 26709.683093 74343.316907 450.302139 -269.909028
1196388 98858 0 5 0 check up durée 1.646342 3.353658 2.613718 -1.831299 26129.514722 72728.485278 449.923700 -269.682193
6213791 98858 0 5 0 up durée vie 1.646342 3.353658 2.613718 -1.831299 26129.514722 72728.485278 449.923700 -269.682193
2050986 98385 0 16 0 démêle vrai faux 5.268295 10.731705 4.675560 -3.275928 26004.494385 72380.505615 448.846048 -269.036253
2197425 97148 0 6 0 entendre durée vie 1.975611 4.024389 2.863184 -2.006088 25677.538451 71470.461549 446.015439 -267.339600
395968 97148 0 6 0 air etc entendre 1.975611 4.024389 2.863184 -2.006088 25677.538451 71470.461549 446.015439 -267.339600
3287680 97148 0 6 0 jour plastique heure 1.975611 4.024389 2.863184 -2.006088 25677.538451 71470.461549 446.015439 -267.339600
2277582 97148 0 6 0 etc entendre durée 1.975611 4.024389 2.863184 -2.006088 25677.538451 71470.461549 446.015439 -267.339600
2895549 97148 0 6 0 heure air etc 1.975611 4.024389 2.863184 -2.006088 25677.538451 71470.461549 446.015439 -267.339600
4588973 97148 0 6 0 plastique heure air 1.975611 4.024389 2.863184 -2.006088 25677.538451 71470.461549 446.015439 -267.339600
5147966 94907 0 3 0 reportage soin intensif 0.987805 2.012195 2.024577 -1.418518 25085.211655 69821.788345 440.841106 -264.238128
535696 100097 3673 135 33 annoncer nouveau mesure 55.317100 112.682900 10.713600 -7.506476 27427.823168 76342.176832 438.787827 -263.007402
3997060 92991 0 2 1 médecin aide soignants 0.987805 2.012195 1.018423 -0.713558 24578.786780 68412.213220 436.368527 -261.557285
4998037 92964 0 1 0 quotidien service soin 0.329268 0.670732 1.168890 -0.818982 24571.650313 68392.349687 436.305172 -261.519310
384144 92964 0 1 0 aide soignants raconter 0.329268 0.670732 1.168890 -0.818982 24571.650313 68392.349687 436.305172 -261.519310
1099918 92964 0 1 0 cas grave reportage 0.329268 0.670732 1.168890 -0.818982 24571.650313 68392.349687 436.305172 -261.519310
5641967 92964 0 1 0 soignants raconter réalité 0.329268 0.670732 1.168890 -0.818982 24571.650313 68392.349687 436.305172 -261.519310
2476802 92964 0 1 0 filmer quotidien service 0.329268 0.670732 1.168890 -0.818982 24571.650313 68392.349687 436.305172 -261.519310
5308583 92964 0 1 0 réalité chevet cas 0.329268 0.670732 1.168890 -0.818982 24571.650313 68392.349687 436.305172 -261.519310
2722747 92964 0 1 0 genève médecin aide 0.329268 0.670732 1.168890 -0.818982 24571.650313 68392.349687 436.305172 -261.519310
2796180 92964 0 1 0 grave reportage soin 0.329268 0.670732 1.168890 -0.818982 24571.650313 68392.349687 436.305172 -261.519310
1208937 92964 0 1 0 chevet cas grave 0.329268 0.670732 1.168890 -0.818982 24571.650313 68392.349687 436.305172 -261.519310
5007521 92964 0 1 0 raconter réalité chevet 0.329268 0.670732 1.168890 -0.818982 24571.650313 68392.349687 436.305172 -261.519310
6208827 93394 595 2 1 universitaire genève médecin 0.987805 2.012195 1.018423 -0.713558 24842.571762 69146.428238 434.928865 -260.694358
3003996 90388 0 2 0 il bon raison 0.658537 1.341463 1.653060 -1.158215 23890.778457 66497.221543 430.217774 -257.870552
1196496 89375 0 9 0 checkup check up 2.963416 6.036584 3.506670 -2.456946 23623.028772 65751.971228 427.800205 -256.421472
6494471 88044 0 8 0 vrai faux checkup 2.634148 5.365852 3.306120 -2.316431 23271.227358 64772.772642 424.602790 -254.504956
1557611 87530 0 4 0 coronavirus démêle vrai 1.317074 2.682926 2.337780 -1.637964 23135.370164 64394.629836 423.361562 -253.760970
In [86]:
fig = px.bar(graph_interactions_media3, x="res_inter_media", y="trigramme", labels={"res_inter_media":"Résiduel de Pearson (<b>en fonction des interactions</b>)","trigramme":"Trigramme (dans les posts Facebook <b>médias</b>)"}, orientation="h", title="Trigrammes les plus caractéristiques des posts Facebook <b>médias</b> {} en 2020<br>(en fonction des interactions suscitées)".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="green", opacity=0.75, textfont_size=12)
fig
In [87]:
graph_interactions_nonmedia3 = khi2_3.sort_values(by=["res_inter_nonmedia"],ascending=False).head(50)
graph_interactions_nonmedia3
Out[87]:
interactions_media interactions_nonmedia obs_media obs_nonmedia trigramme exp_media exp_nonmedia res_media res_nonmedia exp_inter_media exp_inter_nonmedia res_inter_media res_inter_nonmedia
5724912 0 585745 0 471 souviens série tv 155.085440 315.914560 -12.453330 8.725417 154820.374689 430924.625311 -393.472203 235.845426
5917743 0 534001 0 425 série tv alffr 139.939091 285.060909 -11.829585 8.288391 141143.731324 392857.268676 -375.691005 225.187458
1299974 0 442567 0 403 clique image regarde 132.695185 270.304815 -11.519340 8.071017 116976.480832 325590.519168 -342.018246 205.004161
2847983 0 343301 0 285 générique détail série 93.841508 191.158492 -9.687183 6.787318 90739.126157 252561.873843 -301.229358 180.555490
2078128 0 313794 0 243 détail série mythique 80.012233 162.987767 -8.944956 6.267277 82940.024507 230853.975493 -287.993098 172.621736
5888558 0 304065 0 190 switzerland suisseromande romandie 62.561005 127.438995 -7.909552 5.541822 80368.517409 223696.482591 -283.493417 169.924648
5838415 0 304065 0 190 suisse switzerland suisseromande 62.561005 127.438995 -7.909552 5.541822 80368.517409 223696.482591 -283.493417 169.924648
3910267 0 304065 0 190 montagne lacleman leman 62.561005 127.438995 -7.909552 5.541822 80368.517409 223696.482591 -283.493417 169.924648
5883983 0 304065 0 190 swiss montagne lacleman 62.561005 127.438995 -7.909552 5.541822 80368.517409 223696.482591 -283.493417 169.924648
3987645 0 304065 0 190 mylausanne iglausann igerslausanne 62.561005 127.438995 -7.909552 5.541822 80368.517409 223696.482591 -283.493417 169.924648
3000575 0 304065 0 190 igerslausanne vaud cantondevaud 62.561005 127.438995 -7.909552 5.541822 80368.517409 223696.482591 -283.493417 169.924648
3418727 0 304065 0 190 lausanneparcoeur lausanne mylausanne 62.561005 127.438995 -7.909552 5.541822 80368.517409 223696.482591 -283.493417 169.924648
3000685 0 304065 0 190 iglausann igerslausanne vaud 62.561005 127.438995 -7.909552 5.541822 80368.517409 223696.482591 -283.493417 169.924648
5843521 0 304065 0 190 suisseromande romandie switzerlandpicture 62.561005 127.438995 -7.909552 5.541822 80368.517409 223696.482591 -283.493417 169.924648
3414017 0 304065 0 190 lausanne mylausanne iglausann 62.561005 127.438995 -7.909552 5.541822 80368.517409 223696.482591 -283.493417 169.924648
1074708 0 301932 0 189 cantondevaud suisse switzerland 62.231737 126.768263 -7.888709 5.527219 79804.736482 222127.263518 -282.497321 169.327593
6275741 0 301932 0 189 vaud cantondevaud suisse 62.231737 126.768263 -7.888709 5.527219 79804.736482 222127.263518 -282.497321 169.327593
6163916 0 292369 0 216 tv alffr découvrir 71.121985 144.878015 -8.433385 5.908845 77277.105442 215091.894558 -277.987599 166.624486
416773 0 287023 0 216 alffr découvrir série 71.121985 144.878015 -8.433385 5.908845 75864.084891 211158.915109 -275.434357 165.094085
1998481 0 285940 0 1635 découvrir avenir amoureux 538.353915 1096.646085 -23.202455 16.256784 75577.833252 210362.166748 -274.914229 164.782322
467945 0 285940 0 1635 amoureux 3 cliquer 538.353915 1096.646085 -23.202455 16.256784 75577.833252 210362.166748 -274.914229 164.782322
755816 0 285940 0 1635 avenir amoureux 3 538.353915 1096.646085 -23.202455 16.256784 75577.833252 210362.166748 -274.914229 164.782322
5258487 0 285180 0 152 romandie switzerlandpicture svizzera 50.048804 101.951196 -7.074518 4.956756 75376.954910 209803.045090 -274.548639 164.563189
5888884 0 285180 0 152 switzerlandpicture svizzera schweiz 50.048804 101.951196 -7.074518 4.956756 75376.954910 209803.045090 -274.548639 164.563189
3989454 0 285180 0 152 myswitzerland swiss montagne 50.048804 101.951196 -7.074518 4.956756 75376.954910 209803.045090 -274.548639 164.563189
5881176 0 285180 0 152 svizzera schweiz myswitzerland 50.048804 101.951196 -7.074518 4.956756 75376.954910 209803.045090 -274.548639 164.563189
5480343 0 285180 0 152 schweiz myswitzerland swiss 50.048804 101.951196 -7.074518 4.956756 75376.954910 209803.045090 -274.548639 164.563189
3771940 0 277847 0 1 meryl streep immense 0.329268 0.670732 -0.573819 0.402046 73438.743218 204408.256782 -270.995836 162.433656
4202867 0 277847 0 1 oeil trouve souvenir 0.329268 0.670732 -0.573819 0.402046 73438.743218 204408.256782 -270.995836 162.433656
1135556 0 277847 0 1 cerne oeil trouve 0.329268 0.670732 -0.573819 0.402046 73438.743218 204408.256782 -270.995836 162.433656
5194859 0 277847 0 1 retire ride front 0.329268 0.670732 -0.573819 0.402046 73438.743218 204408.256782 -270.995836 162.433656
2131073 0 277847 0 1 embrassé cerne oeil 0.329268 0.670732 -0.573819 0.402046 73438.743218 204408.256782 -270.995836 162.433656
6127252 0 277847 0 1 travers étonnement face 0.329268 0.670732 -0.573819 0.402046 73438.743218 204408.256782 -270.995836 162.433656
5224052 0 277847 0 1 ri embrassé cerne 0.329268 0.670732 -0.573819 0.402046 73438.743218 204408.256782 -270.995836 162.433656
3922388 0 277847 0 1 montrer ri embrassé 0.329268 0.670732 -0.573819 0.402046 73438.743218 204408.256782 -270.995836 162.433656
844156 0 277847 0 1 bel meryl streep 0.329268 0.670732 -0.573819 0.402046 73438.743218 204408.256782 -270.995836 162.433656
4598799 0 277847 0 1 pleurer bel meryl 0.329268 0.670732 -0.573819 0.402046 73438.743218 204408.256782 -270.995836 162.433656
5723752 0 277847 0 1 souvenir pleurer bel 0.329268 0.670732 -0.573819 0.402046 73438.743218 204408.256782 -270.995836 162.433656
6145431 0 277847 0 1 trouve souvenir pleurer 0.329268 0.670732 -0.573819 0.402046 73438.743218 204408.256782 -270.995836 162.433656
4185688 0 277847 0 1 obtenir travers étonnement 0.329268 0.670732 -0.573819 0.402046 73438.743218 204408.256782 -270.995836 162.433656
5227715 0 277847 0 1 ride front obtenir 0.329268 0.670732 -0.573819 0.402046 73438.743218 204408.256782 -270.995836 162.433656
6705539 0 277847 0 1 étonnement face beauté 0.329268 0.670732 -0.573819 0.402046 73438.743218 204408.256782 -270.995836 162.433656
6356635 0 277847 0 1 vie bouche montrer 0.329268 0.670732 -0.573819 0.402046 73438.743218 204408.256782 -270.995836 162.433656
2626034 0 277847 0 1 front obtenir travers 0.329268 0.670732 -0.573819 0.402046 73438.743218 204408.256782 -270.995836 162.433656
5781437 0 277847 0 1 streep immense acteur 0.329268 0.670732 -0.573819 0.402046 73438.743218 204408.256782 -270.995836 162.433656
838174 0 277847 0 1 beauté vie bouche 0.329268 0.670732 -0.573819 0.402046 73438.743218 204408.256782 -270.995836 162.433656
949356 0 277847 0 1 bouche montrer ri 0.329268 0.670732 -0.573819 0.402046 73438.743218 204408.256782 -270.995836 162.433656
2351510 0 277847 0 1 face beauté vie 0.329268 0.670732 -0.573819 0.402046 73438.743218 204408.256782 -270.995836 162.433656
3017331 0 266556 0 193 image regarde générique 63.548811 129.451189 -7.971751 5.585402 70454.378263 196101.621737 -265.432436 159.098980
2008582 0 266044 0 213 découvrir série tv 70.134180 142.865820 -8.374615 5.867668 70319.049695 195724.950305 -265.177393 158.946108
In [88]:
fig = px.bar(graph_interactions_nonmedia3, x="res_inter_nonmedia", y="trigramme", labels={"res_inter_nonmedia":"Résiduel de Pearson (<b>en fonction des interactions</b>)","trigramme":"Trigramme (dans les posts Facebook <b>non-médias</b>)"}, orientation="h", title="Trigrammes les plus caractéristiques des posts Facebook <b>non-médias</b> {} en 2020<br>(en fonction des interactions suscitées)".format(pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=1000)
fig.update_traces(marker_color="lightgreen", opacity=0.75, textfont_size=12)
fig

in English

In [89]:
pays = "Romandy (French Switzerland)"
In [90]:
termeMaj = "Lemma"
termeMin = "lemma"
yy = "mot"
media = "media"
source = graph_interactions_media1[:10]
xx= "res_inter_media"
fig = px.bar(source, x=xx, y=yy, labels={xx:"χ<sup>2</sup> residual (<b>weighed by interactions</b>)",yy:"{} (in <b>{}</b> Facebook posts)".format(termeMaj,media)}, orientation="h", title="Most characteristic {}s among <b>{}</b> Facebook posts in {} (2020)".format(termeMin,media,pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=400)
fig.update_traces(marker_color="blue", opacity=0.75, textfont_size=12)
fig
In [91]:
termeMaj = "Lemma"
termeMin = "lemma"
yy = "mot"
media = "non-media"
source = graph_interactions_nonmedia1[:10]
xx= "res_inter_nonmedia"
fig = px.bar(source, x=xx, y=yy, labels={xx:"χ<sup>2</sup> residual (<b>weighed by interactions</b>)",yy:"{} (in <b>{}</b> Facebook posts)".format(termeMaj,media)}, orientation="h", title="Most characteristic {}s among <b>{}</b> Facebook posts in {} (2020)".format(termeMin,media,pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=400)
fig.update_traces(marker_color="blue", opacity=0.75, textfont_size=12)
fig
In [92]:
termeMaj = "Bigram"
termeMin = "bigram"
yy = "bigramme"
media = "media"
source = graph_interactions_media2[:10]
xx= "res_inter_media"
fig = px.bar(source, x=xx, y=yy, labels={xx:"χ<sup>2</sup> residual (<b>weighed by interactions</b>)",yy:"{} (in <b>{}</b> Facebook posts)".format(termeMaj,media)}, orientation="h", title="Most characteristic {}s among <b>{}</b> Facebook posts in {} (2020)".format(termeMin,media,pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=400)
fig.update_traces(marker_color="blue", opacity=0.75, textfont_size=12)
fig
In [93]:
termeMaj = "Bigram"
termeMin = "bigram"
yy = "bigramme"
media = "non-media"
source = graph_interactions_nonmedia2[:10]
xx= "res_inter_nonmedia"
fig = px.bar(source, x=xx, y=yy, labels={xx:"χ<sup>2</sup> residual (<b>weighed by interactions</b>)",yy:"{} (in <b>{}</b> Facebook posts)".format(termeMaj,media)}, orientation="h", title="Most characteristic {}s among <b>{}</b> Facebook posts in {} (2020)".format(termeMin,media,pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=400)
fig.update_traces(marker_color="blue", opacity=0.75, textfont_size=12)
fig
In [94]:
termeMaj = "Trigram"
termeMin = "trigram"
yy = "trigramme"
media = "media"
source = graph_interactions_media3[:10]
xx= "res_inter_media"
fig = px.bar(source, x=xx, y=yy, labels={xx:"χ<sup>2</sup> residual (<b>weighed by interactions</b>)",yy:"{} (in <b>{}</b> Facebook posts)".format(termeMaj,media)}, orientation="h", title="Most characteristic {}s among <b>{}</b> Facebook posts in {} (2020)".format(termeMin,media,pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=400)
fig.update_traces(marker_color="blue", opacity=0.75, textfont_size=12)
fig
In [95]:
termeMaj = "Trigram"
termeMin = "trigram"
yy = "trigramme"
media = "non-media"
source = graph_interactions_nonmedia3[:10]
xx = "res_inter_nonmedia"
fig = px.bar(source, x=xx, y=yy, labels={xx:"χ<sup>2</sup> residual (<b>weighed by interactions</b>)",yy:"{} (in <b>{}</b> Facebook posts)".format(termeMaj,media)}, orientation="h", title="Most characteristic {}s among <b>{}</b> Facebook posts in {} (2020)".format(termeMin,media,pays))
fig.update_layout(yaxis=dict(autorange="reversed"), width=1000, height=400)
fig.update_traces(marker_color="blue", opacity=0.75, textfont_size=12)
fig
In [ ]: